/***************************************************************************************************
** WHAT: 	Compiling the database for the paper
			"Institutional mistrust and child vaccination coverage in Africa",
			by Nik Stoop, Kalle Hirvonen and Jean-François Maystadt.
			Published in BMJ:Global Health
			Open access through http://dx.doi.org/10.1136/bmjgh-2020-004595
			
** WHEN: 	April '21

** WHO:	 	Nik Stoop, nik.stoop@uantwerp.be
***************************************************************************************************/

clear all
set more off
	
	*DHS child recode data files:
	global dhsdata 	"ADD LOCATION OF FOLDER HERE"
	*temporary folder to store data in:
	global temp 	"ADD LOCATION OF FOLDER HERE"
	*final data goes here:
	global datacons "ADD LOCATION OF FOLDER HERE"
	*AB data is stored here:
	global abdata 	"ADD LOCATION OF FOLDER HERE"
	*AB data temporary files go here:
	global abtemp 	"ADD LOCATION OF FOLDER HERE"
	*Do files are located here:
	global dofile 	"ADD LOCATION OF FOLDER HERE"

	
/*	
	NOTE: 	We are not allowed to share the DHS & AB data that our analysis relies on. 
			But the raw data are freely available to download on the DHS and AB websites
			see https://dhsprogram.com/ and https://afrobarometer.org
			This do-file constructs the final analytical database from the raw DHS & AB data.
*/
	

******************************************************************************************************
*A) Compile the DHS databases
******************************************************************************************************

	******************************************************************************************************
	*A1) Download the data
	******************************************************************************************************
	
	* From the DHS website (https://dhsprogram.com/), download all DHS surveys listed in Table A.1 in the online supplementary appendix
	
	******************************************************************************************************
	*A2) Fetch all child recode files & only keep interesting variables
	******************************************************************************************************

	foreach datafile of newlist AOKR71FL	BFKR21FL	BFKR31FL	BFKR43FL	BFKR62FL	BJKR31FL	BJKR41FL	BJKR51FL	BJKR61FL	BJKR71FL	BUKR01FL	BUKR61FL	BUKR70FL	CDKR50FL	CDKR61FL	CFKR31FL	CGKR51FL	CGKR60FL	CIKR3AFL	CIKR35FL	CIKR62FL	 		CMKR44FL	CMKR61FL	ETKR41FL	ETKR51FL	ETKR61FL	ETKR71FL	GAKR41FL	GAKR60FL	GHKR01FL	GHKR4BFL	GHKR5AFL	GHKR31FL	GHKR41FL	GHKR72FL	GNKR41FL	gnkr52fl	GNKR62FL	GNKR71FL	KEKR01FL	KEKR3AFL	KEKR31FL	KEKR42FL	KEKR52FL	KEKR72FL	KMKR31FL	KMKR61FL	LBKR01FL	LBKR6AFL	LBKR51FL	LSKR41FL	LSKR61FL	LSKR71FL	MDKR21FL	MDKR31FL	MDKR42FL	MDKR51FL	MLKR01FL	MLKR6AFL	MLKR7HFL	 	 	MLKR53FL	MWKR4EFL	MWKR7AFL	MWKR21FL	MWKR41FL	MWKR61FL	MZKR31FL	MZKR41FL	MZKR62FL	NGKR4BFL	NGKR6AFL	NGKR7AFL	 	NGKR53FL	 		NIKR51FL	NIKR61FL	NMKR21FL	NMKR41FL	NMKR51FL	NMKR61FL	OSKR01FL	RWKR5AFL	RWKR21FL	RWKR41FL	RWKR53FL	RWKR61FL	RWKR70FL	SDKR01FL	SLKR51FL	SLKR61FL	 	SNKR4HFL	SNKR6DFL	SNKR6RFL	SNKR7HFL	SNKR7IFL	SNKR7ZFL	SNKR21FL	SNKR32FL	SNKR61FL	SNKRG0FL	STKR50FL	SZKR51FL	 	TDKR41FL	TDKR71FL	TGKR01FL	TGKR31FL	TGKR61FL	TZKR3AFL TZKR4IFL	TZKR7BFL	TZKR21FL	TZKR41FL	TZKR63FL	 	UGKR7BFL	UGKR33FL	UGKR41FL	UGKR52FL	UGKR60FL	ZAKR31FL	ZAKR71FL	ZMKR21FL	ZMKR31FL	ZMKR42FL	ZMKR51FL	ZMKR61FL  ZMKR71FL	ZWKR01FL	ZWKR31FL	ZWKR42FL	ZWKR52FL	ZWKR62FL	ZWKR72FL  CMKR71FL {
	use "$dhsdata/`datafile'", clear

	*store the file name in case you want to check the original data later:
		_getfilename "`c(filename)'"
		di substr(r(filename),1,length(r(filename))-4)
		gen filename=substr(r(filename),1,length(r(filename))-4)
	 
	*keep variables of interest
		capture	gen	v000	=. // country code & phase
		capture	gen	v001	=. // cluster number
		capture	gen	v021	=. // primary sampling unit	
		capture	gen	v002	=. // household number
		capture	gen	v003	=. // respondent's line number
		capture	gen	v004	=. // ultimate area unit
		capture	gen	v005	=. // women's individual sample weight
		capture	gen	v006	=. // month of interview
		capture	gen	v007	=. // year of interview
		capture	gen	v008	=. // date of interview (cmc)
		capture	gen	v009	=. // respondent's month of birth
		capture	gen	v010	=. // respondent's year of birth 
		capture	gen	v011	=. // date of birth (cmc)
		capture	gen	v012	=. // respondent's current age
		capture	gen	v013	=. // age in 5-year groups
		capture	gen	v014	=. // completeness of age information
		capture	gen	v015	=. // result of individual interview
		capture	gen	v024	=. // region
		capture	gen	v025	=. // type of place of residence
		capture	gen	v101	=. // region
		capture	gen	v102	=. // type of place of residence
		capture	gen	v119	=. // household has: electricity
		capture	gen	v120	=. // household has: radio
		capture	gen	v121	=. // household has: television
		capture	gen	v122	=. // household has: refrigerator
		capture	gen	v123	=. // household has: bicycle
		capture	gen	v124	=. // household has: motorcycle/scooter
		capture	gen	v125	=. // household has: car/truck
		capture	gen	v127	=. // main floor material
		capture	gen	v128	=. // main wall material
		capture	gen	v129	=. // main roof material
		capture	gen	v130	=. // religion
							capture decode v130, gen(v130_str)
							capture gen str v130_str=""
		
		capture gen v104	=. // years living in that location
		
		capture gen b5		=. // child alive
		
		capture gen v136	=. // number of household members
		
		capture gen v467a	=. // getting medical help 
		capture gen v467b	=. // getting medical help 
		capture gen v467c	=. // getting medical help 
		capture gen v467d	=. // getting medical help 
		capture gen v467e	=. // getting medical help 
		capture gen v467f	=. // getting medical help 
		capture gen v467g	=. // getting medical help 
		capture gen v467h	=. // getting medical help 
		capture gen v467i	=. // getting medical help 
		capture gen v467j	=. // getting medical help 
		capture gen v467k	=. // getting medical help 
		capture gen v467l	=. // getting medical help 
		capture gen v467m	=. // getting medical help 
		
		capture gen m10		=. // child wanted at birth?
		
		capture gen m14		=. // nr antenatal visits
		capture gen m70		=. // postnatal visit within two months
		capture gen m72		=. // person who performed postnatal checkup
		capture gen m15		=. // place of delivery
								capture decode m15, gen(m15_str)
								capture gen str m15_str=""	

		capture gen v394	=. // visited health facility last 12 months
		
		capture	gen	v131	=. // ethnicity
		capture	gen	v133	=. // education in single years
		capture	gen	v151	=. // sex of household head
		capture	gen	v152	=. // age of household head
		capture	gen	v153	=. // household has: telephone (land-line)
		capture gen v169a	=. // owns a mobile phone // s122b s122c
		capture gen v190	=. // wealth index combined
		capture gen v190a	=. // wealth index for urban/rural
		capture gen v034	=. // line number of husband
		capture gen v715	=. // husband/partner's total nr of years of education
		
		capture gen v045c	=. // native language of respondent
		capture gen s121	=. // language usually spoke at home
		capture gen v155	=. // literacy
		capture gen v157	=. // freq read newspaper
		capture gen v158	=. // freq listen radio
		capture gen v159	=. // freq watching tv
		capture gen v501	=. // current marital status
		capture gen v217	=. // knowledge ovulatory cycle
		
		capture gen v466	=. // decision health kid
		capture gen v743a	=. // decision 
		capture gen v743b	=. // decision
		capture gen v743c	=. // decision 
		capture gen v743d	=. // decision 
		capture gen v743e	=. // decision 
		capture gen v743f	=. // decision 
		
		capture gen v212	=. // age of respondent at 1st birth
		capture gen v202	=. // sons at home
		capture gen v203	=. // daughters at home
		capture gen v137	=. // number of children 5 and under in household
		capture gen v201	=. // total children ever born
		capture gen v218	=. // number of living children
		capture gen v219	=. // number of living children + current pregnancy
		
		capture gen bord	=. // birth order number
		capture gen b11		=. // preceding birth interval
		capture gen b12		=. // succeeding birth interval
		capture gen b8		=. // current age of child
		capture gen b19		=. // current age of child in months (months since birth for dead children)
		capture gen b3		=. // date of birth (cmc)
		capture gen b4		=. // sex of child

		capture gen m1=.		// nr of tetanus injections before birth
		capture gen m2g=.		// prenatal: traditional birth attendant
		capture gen m3g=.		// assistance: traditional birth attendant
		
		capture gen	h1=. 		// has health card
		capture gen	h1a=.		// has health card and or other vaccination document
		capture gen	h2=.		// received bcg
		capture gen	h2d=.		// bcg day
		capture gen	h2m=.		// bcg month
		capture gen	h2y=.		// bcg year
		capture gen	h3=.		// received dpt1
		capture gen	h3d=.
		capture gen	h3m=.
		capture gen	h3y=.
		capture gen	h4=.		// received polio1
		capture gen	h4d=.
		capture gen	h4m=.
		capture gen	h4y=.
		capture gen	h5=.		// received dpt2
		capture gen	h5d=.
		capture gen	h5m=.
		capture gen	h5y=.
		capture gen	h6=.		// received polio2
		capture gen	h6d=.
		capture gen	h6m=.
		capture gen	h6y=.
		capture gen	h7=.		// received dpt3
		capture gen	h7d=.
		capture gen	h7m=.
		capture gen	h7y=.
		capture gen	h8=.		// received polio3
		capture gen	h8d=.
		capture gen	h8m=.
		capture gen	h8y=.
		capture gen	h9=.		// received measles1
		capture gen	h9d=.
		capture gen	h9m=.
		capture gen	h9y=.
		capture gen	h9a=.		// received measles2
		capture gen	h9ad=.
		capture gen	h9am=.
		capture gen	h9ay=.
		capture gen	h0=.		// received polio 0
		capture gen	h0d=.
		capture gen	h0m=.
		capture gen	h0y=.
		capture gen	h10=.		// ever had vaccination
		
		capture gen sdh1=.		
		capture gen sdh2=.
		capture gen sdh3=.
		capture gen sdhh1=.
		capture gen sdhh2=.
		capture gen sdhh3=.	
		capture gen hb1=.		
		capture gen hb2=.
		capture gen hb3=.	
		
		capture decode sdh1, gen(sdh1_str)
		capture gen str sdh1_str=""
		capture decode sdh2, gen(sdh2_str)
		capture gen str sdh2_str=""
		capture decode sdh3, gen(sdh3_str)
		capture gen str sdh3_str=""
		capture decode sdhh1, gen(sdhh1_str)
		capture gen str sdhh1_str=""
		capture decode sdhh2, gen(sdhh2_str)
		capture gen str sdhh2_str=""
		capture decode sdhh3, gen(sdhh3_str)
		capture gen str sdhh3_str=""
		capture decode hb1, gen(hb1_str)
		capture gen str hb1_str=""
		capture decode hb2, gen(hb2_str)
		capture gen str hb2_str=""
		capture decode hb3, gen(hb3_str)
		capture gen str hb3_str=""
		
		* bed nets
		capture gen ml101=. 
		capture gen v459=.
		capture gen v460=. 
		capture gen v461=. 
		capture gen ml0=.
		
		* anemia
		capture gen hw57=.
		capture gen hw70=.
		capture gen hw71=.
		capture gen hw72=.
		capture gen hw73=.
		capture gen v457=.
		
		*h10: "ever had vaccination". Sometimes the value labels do not match across surveys. Therefore, let's create a string variable out of this using the value labels:
		capture decode h10, gen(h10_str)
		capture gen str h10_str=""
		
		*hemoglobin test variables 
		capture	gen	v042	=.	// household selected for hemoglobin
		capture gen v452c	=.	// read consent statement - hemoglobin
		capture	gen	v453	=.	// hemoglobin level
		capture	gen	v455	=.	// result of measurement - hemoglobin
		capture	gen	v456	=.	// hemoglobin level adjusted for altitude and smoking

		*v455 tells the outcome of the hemoglobin measurement. Sometimes the value labels do not match across surveys. Therefore, let's create a string variable out of this using the value labels:
		capture decode v455, gen(v455_str)
		capture gen str v455_str=""
		
		*hiv test variables 
		capture gen v473a	=.	// read consent statement - hiv
		capture gen v473b	=.	// result of measurement - hiv
		capture gen s1025	=.	// last hiv test result
		
		*v473b tells the outcome of the hiv measurement. Sometimes the value labels do not match across surveys. Therefore, let's create a string variable out of this using the value labels:
		capture decode v473b, gen(v473b_str)
		capture gen str v473b_str=""	
		
		*Region: Sometimes the value labels do not match across surveys. Therefore, let's create a string variable out of this using the value labels:
		decode v101, generate(region)

		keep  	midx	v000	v001	v021  v002	 v003	v004	v005	v006	v007	v008	v009	v010	v011	v012	v013	v014	v015	v024	v025	v101	v102	v119	v120	v121	v122	v123	v124	v125	v127	v128	v129	v130	v131	v133	v151	v152	v153 h1	h1a	h2	h2d	h2m	h2y	h3	h3d	h3m	h3y	h4	h4d	h4m	h4y	h5	h5d	h5m	h5y	h6	h6d	h6m	h6y	h7	h7d	h7m	h7y	h8	h8d	h8m	h8y	h9	h9d	h9m	h9y	h9a	h9ad	h9am	h9ay	h0	h0d	h0m	h0y	h10 h10_str v042 v453 v455 v456 v455_str region filename v045c s121 v452c v473a v473b v473b_str s1025 bord b3 b4 b8 b19 b11 b12 v212 v202 v203 v137 v201 v218 v219 v190 v190a m1 v034 v715 v130_str v394 m2g m3g v136 v467* m14 m10 m15 m70 m72 v155 v157 v158 v159 v501 v217 v743* v466 m15_str ml101 v459 v460 v461 ml0  hw57 hw70 hw71 hw72 hw73 v457 b5 sdh1 sdh2 sdh3 sdh1_str sdh2_str sdh3_str  sdhh1 sdhh2 sdhh3 sdhh1_str sdhh2_str sdhh3_str hb1 hb2 hb3 hb1_str hb2_str hb3_str v104 v169a


	*construct cleaner interview year variables that are constant across each survey:
		egen minyear=min(v007)
		tostring minyear, replace
		local temp1=v000
		local temp2=minyear

		di "`temp1'"
		di "`temp2'"

		*fix interview year:
		replace v007=1900+v007 if inrange(v007, 80,99)

		replace v007=2000 if v000=="MR3"
		replace v007=2000 if v000=="GA3"
		*ethiopia:
		replace v007=v007+8 if v000=="ET4" 
		replace v007=v007+8 if v000=="ET5" 
		replace v007=v007+8 if v000=="ET6" 

		egen   start_year=min(v007)
		egen   end_year=max(v007)
		tostring start_year, gen(start_year2)
		tostring end_year, gen(end_year2)

	*save these for data pooling: 
	save "$temp/`temp1'_`temp2'.dta", replace

	  }

 
	******************************************************************************************************
	*A3) Pool all data 
	******************************************************************************************************

	use "$temp/AO7_2015.dta", clear
	qui foreach v of newlist 	BF2_92	BF3_98	BF4_2003	BF6_2010	BJ3_96	BJ4_2001	BJ5_2006	BJ6_2011	BJ7_2017	BU_87	BU6_2010	BU7_2016	CD5_2007	CD6_2013	CF3_94	CG5_2005	CG6_2011	CI3_94	CI3_98	CI6_2011	CM4_2004	CM6_2011	ET4_1992	ET4_1997	ET6_2003	ET7_2008	GA3_0	GA6_2012	GH_88	GH2_93	GH3_98	GH4_2003	GH5_2008	GH6_2014	GN3_99	GN4_2005	GN6_2012	GN7_2018	KE_88	KE2_93	KE3_98	KE4_2003	KE5_2008	KE6_2014	KM3_96	KM6_2012	LB_86	LB5_2006	LB6_2013	LS4_2004	LS5_2009	LS6_2014	MD2_92	MD3_97	MD4_2003	MD5_2008	ML_87	ML5_2006	ML6_2012	ML7_2018	MW2_92	MW4_2000	MW4_2004	MW5_2010	MW7_2015	MZ3_97	MZ4_2003	MZ6_2011	NG4_2003	NG5_2008	NG6_2013	NG7_2018	NI5_2006	NI6_2012	NM2_92	NM4_2000	NM5_2006	NM6_2013	OS_86	RW2_92	RW4_2000	RW4_2005	RW5_2007	RW6_2010	RW6_2014	SD_89	SL5_2008	SL6_2013	SN2_92	SN2_97	SN4_2005	SN6_2010	SN6_2012	SN6_2015	SN6_2016	SN7_2017	ST5_2008	SZ5_2006	TD4_2004	TD6_2014	TG_88	TG3_98	TG6_2013	TZ2_91	TZ3_96	TZ3_99	TZ4_2004	TZ5_2009	TZ7_2015	UG3_95	UG4_2000	UG5_2006	UG6_2011	UG7_2016	ZA3_98	ZA7_2016	ZM2_92	ZM3_96	ZM4_2001	ZM5_2007	ZM6_2013  ZM7_2018	ZW_88	ZW3_94	ZW4_1999	ZW5_2005	ZW6_2010	ZW7_2015  CM7_2018 {

	append using "$temp/`v'.dta"
	}
	

	******************************************************************************************************
	*A4) Create variables for the analysis
	****************************************************************************************************** 
	 
	gen countrycode=substr(v000, 1,2)
	ren v000 phase

	* create a country variable based on the country codes:
		gen str country=""
		replace country=	"Angola" 	if countrycode==	"AO" 
		replace country=	"Benin" 	if countrycode==	"BJ" 
		replace country=	"Botswana" 	if countrycode==	"BT" 
		replace country=	"Burkina Faso" 	if countrycode==	"BF" 
		replace country=	"Burundi" 	if countrycode==	"BU" 
		replace country=	"Cameroon" 	if countrycode==	"CM" 
		replace country=	"Central African Republic" 	if countrycode==	"CF" 
		replace country=	"Chad" 	if countrycode==	"TD" 
		replace country=	"Comoros" 	if countrycode==	"KM" 
		replace country=	"Congo" 	if countrycode==	"CG" 
		replace country=	"Congo Democratic Republic" 	if countrycode==	"CD" 
		replace country=	"Cote d'Ivoire" 	if countrycode==	"CI" 
		replace country=	"Egypt" 	if countrycode==	"EG" 
		replace country=	"Eritrea" 	if countrycode==	"ER" 
		replace country=	"Ethiopia" 	if countrycode==	"ET" 
		replace country=	"Gabon" 	if countrycode==	"GA" 
		replace country=	"Gambia" 	if countrycode==	"GM" 
		replace country=	"Ghana" 	if countrycode==	"GH" 
		replace country=	"Guinea" 	if countrycode==	"GN" 
		replace country=	"Kenya" 	if countrycode==	"KE" 
		replace country=	"Lesotho" 	if countrycode==	"LS" 
		replace country=	"Liberia" 	if countrycode==	"LB" 
		replace country=	"Madagascar" 	if countrycode==	"MD" 
		replace country=	"Malawi" 	if countrycode==	"MW" 
		replace country=	"Mali" 	if countrycode==	"ML" 
		replace country=	"Mauritania" 	if countrycode==	"MR" 
		replace country=	"Morocco" 	if countrycode==	"MA" 
		replace country=	"Mozambique" 	if countrycode==	"MZ" 
		replace country=	"Namibia" 	if countrycode==	"NM" 
		replace country=	"Niger" 	if countrycode==	"NI" 
		replace country=	"Nigeria" 	if countrycode==	"NG" 
		replace country=	"Rwanda" 	if countrycode==	"RW" 
		replace country=	"Sao Tome and Principe" 	if countrycode==	"ST" 
		replace country=	"Senegal" 	if countrycode==	"SN" 
		replace country=	"Sierra Leone" 	if countrycode==	"SL" 
		replace country=	"South Africa" 	if countrycode==	"ZA" 
		replace country=	"Sudan" 	if countrycode==	"SD" 
		replace country=	"Swaziland" 	if countrycode==	"SZ" 
		replace country=	"Tanzania" 	if countrycode==	"TZ" 
		replace country=	"Togo" 	if countrycode==	"TG" 
		replace country=	"Tunisia" 	if countrycode==	"TN" 
		replace country=	"Uganda" 	if countrycode==	"UG" 
		replace country=	"Zambia" 	if countrycode==	"ZM" 
		replace country=	"Zimbabwe" 	if countrycode==	"ZW" 
		replace country=	"Cape Verde" 	if countrycode==	"CV" 
		replace country=	"Equatorial Guinea" 	if countrycode==	"EK" 
		replace country=	"Nigeria (Ondo State)" 	if countrycode==	"OS" 
		
		assert country!=""
		encode country, generate(country_id)


	* some HH characteristics
		tab v102
		gen urban=v102==1
		label var urban "urban cluster" 	
		
		tab v190
		rename v190 wealth
		
		tab v137
		rename v137 nr_under5 // 0.2% of observations > 10 ... check this in detail later
		label var nr_under5 "nr. children <=5 years in HH"
		winsor2 nr_under5, suffix(_w) c(0 99.8)
		label var nr_under5_w "nr. children <=5 years in HH (winsorized at 99.8%)"
		
		tab v136
		rename v136 nr_hh
		label var nr_hh "nr. hh members"
		winsor2 nr_hh, suffix(_w) c(0 99)
		label var nr_hh_w "nr. hh members (winsorized at 99%)"
		
		tab v007
		rename v007 year
		
		tab v001
		rename v001 cluster
		
		egen hhid = group (country_id cluster v002 year) 
				
		egen country_cluster=group(country_id cluster)
		
		egen country_year=group(country_id start_year end_year),label
		
		egen country_cluster_year=group(country_id start_year end_year cluster)
		
		egen country_region_year=group(country_id start_year end_year v101)
		
		gen yob = int((b3 - 1) / 12) + 1900
			
		tab v012
		rename v012 age_mother
		label var age_mother "age of mother"
		
		tab v212
		rename v212 age_mother_1stbirth
		label var age_mother_1stbirth "age of mother at 1st birth"
		
		tab v133
		gen school_mother=v133 if v133<96
		tab v715
		gen school_father=v715 if v715<96
		label var school_mother "years of schooling mother"
		label var school_father "years of schooling father"
		
		tab v155
		tab school_mother if v155==.
		tab v155 if school_mother>5
		gen literate=1 if v155==2
		replace literate=0 if v155==0 | v155==1
		label var literate "mother can read"
		
				
		tab v130
		browse v130 v130_str if v130_str=="no religion" | v130_str=="none" 
		tab v130_str if v130==1
			// seems like codes are not always same across countries
			// work with the string variable
			replace v130_str=lower(v130_str)
			tab v130_str
			gen str5 x=v130_str
			gen str9 xx=v130_str
			gen xxx=substr(v130_str,2,5)
			
			gen religion_mother=.
			replace religion_mother=1 if x=="catho" | x=="roman"
			replace religion_mother=2 if x=="islam" | x=="mosle" | x=="musli" | x=="muslm" | x=="muslu" | x=="musul"
			replace religion_mother=3 if x=="prote" | v130_str=="prostestant" | xx=="other pro" | x=="presb" | x=="ccap" | x=="metho"
			
			replace religion_mother=4 if x=="chist" | x=="chris" | xx=="other chr" | v130_str=="orthodox" | x=="jehov" | x=="jeova" | x=="adven" | x=="7th d" | x=="avent" | x=="celes" | x=="eglis" | x=="chari" | x=="evang" | x=="kiban" | x=="kimba" | x=="pente" | x=="seven" |x=="lesot" | x=="apost" | x=="angli" | x=="assem" | x=="elcin" | xxx=="celes"
			replace religion_mother=5 if xx=="taditiona" | xx=="tradition" | x=="anima" | x=="animi" | v130_str=="vodoun" | xx=="other tra" | x=="spiri"
			replace religion_mother=7 if xx=="no religi" | x=="none" | v130_str=="aucune" | v130_str=="sans" | x=="athei"
			replace religion_mother=6 if v130_str=="autres" | xx=="other rel" | xx=="other" | xx=="other (ou"
			replace religion_mother=7 if religion_mother==. & v130_str!="" & v130_str!="not responded"
			
			label define rel 1"catholic" 2"islam" 3"protestant" 4"other christian religions" 5"traditional religion" 6"other religions" 7"no religion" 
			label values religion_mother rel 
			drop x xx xx
			sort religion_mother v130_str
			browse religion_mother v130_str
			
		
		tab v151
		recode v151 (2=0)
		label define sex 0"female" 1"male"
		label values v151 sex
		rename v151 sex_head
		
		tab v394
		replace v394=. if v394==9
		rename v394 visit_12m
				
		* hemo
		tab v455_str
		replace v455_str=lower(v455_str)
		gen refuse_hemo=v455_str=="refused" if v455_str!=""
		
		* hiv
		tab v473b_str
		replace v473b_str=lower(v473b_str)
		gen refuse_hiv=v473b_str=="refused" if v473b_str!=""
		
		* trust measure		
		gen x=1 if refuse_hemo!=. | refuse_hiv!=.
		gen refuse_test=1 if refuse_hiv==1 | refuse_hemo==1
		replace refuse_test=0 if refuse_test!=1 & x==1
		drop x
	
	* age & sex of child
		rename b4 sex
		recode sex (2=0)
		label values sex sex
		
		rename b8 age
	
	* getting medical help 
		tab v467a 
			gen problem_where=1 if v467a==1
			replace problem_where=0 if v467a!=1 & v467a<9
			label var problem_where "getting medical help for self - big problem: know where to go"
		
		tab v467b
			gen problem_permission=1 if v467b==1
			replace problem_permission=0 if v467b!=1 & v467b<9
			label var problem_permission "getting medical help for self - big problem: get permission to go"
		
		tab v467c
			gen problem_money=1 if v467c==1
			replace problem_money=0 if v467c!=1 & v467c<9
			label var problem_money "getting medical help for self - big problem: money for treatment"

		tab v467d
			gen problem_distance=1 if v467d==1
			replace problem_distance=0 if v467d!=1 & v467d<9
			label var problem_distance "getting medical help for self - big problem: distance to health facility"
		
		tab v467e
			gen problem_transport=1 if v467e==1
			replace problem_transport=0 if v467e!=1 & v467e<9
			label var problem_transport "getting medical help for self - big problem: having to take transport"

		tab v467f
			gen problem_alone=1 if v467f==1
			replace problem_alone=0 if v467f!=1 & v467f<9
			label var problem_alone "getting medical help for self - big problem: not wanting to go alone"
		
		tab v467g 
			gen problem_nowoman=1 if v467g==1
			replace problem_nowoman=0 if v467g!=1 & v467g<9
			label var problem_nowoman "getting medical help for self - big problem: no female health provider"


	* vaccination		
		
		* BCG
		tab h2,m
		gen bcg=.
		replace bcg=0 if h2==0
		replace bcg=1 if h2==1|h2==2|h2==3
		replace bcg=. if b5==0
		label variable bcg "received bcg"
		
		* DPT
		tab h3
		gen dpt1=.
		replace dpt1=0 if h3==0
		replace dpt1=1 if h3==1|h3==2|h3==3
		replace dpt1=. if b5==0	
		label variable dpt1 "received dpt1"

		tab h5
		gen dpt2=.
		replace dpt2=0 if h5==0
		replace dpt2=1 if h5==1|h5==2|h5==3
		replace dpt2=. if b5==0		
		label variable dpt2 "received dpt2"
		
		tab h7
		gen dpt3=.
		replace dpt3=0 if h7==0
		replace dpt3=1 if h7==1|h7==2|h7==3
		replace dpt3=. if b5==0		
		label variable dpt3 "received dpt3"
		
		* Combination DPT vaccines for Uganda 2006 & Zambia 2007 & Tanzania 2004-05
		replace dpt1=1 if sdh1>0 & sdh1<4 | sdhh1>0 & sdhh1<4 | hb1>0 & hb1<4
		replace dpt2=1 if sdh2>0 & sdh2<4 | sdhh2>0 & sdhh2<4 | hb2>0 & hb2<4 
		replace dpt3=1 if sdh3>0 & sdh3<4 | sdhh3>0 & sdhh3<4 | hb3>0 & hb3<4 
	

		* Polio
		tab h0
		gen polio0=.
		replace polio0=0 if h0==0
		replace polio0=1 if h0==1|h0==2|h0==3
		replace polio0=. if b5==0
		label variable polio0 "received polio0"	
		
		tab h4
		gen polio1=.
		replace polio1=0 if h4==0
		replace polio1=1 if h4==1|h4==2|h4==3
		replace polio1=. if b5==0			
		label variable polio1 "received polio1"
		
		tab h6
		gen polio2=.
		replace polio2=0 if h6==0
		replace polio2=1 if h6==1|h6==2|h6==3
		replace polio2=. if b5==0				
		label variable polio2 "received polio2"
		
		tab h8
		gen polio3=.
		replace polio3=0 if h8==0
		replace polio3=1 if h8==1|h8==2|h8==3
		replace polio3=. if b5==0			
		label variable polio3 "received polio3"
		
		* Measles
		tab h9
		gen measles=.
		replace measles=0 if h9==0
		replace measles=1 if h9==1|h9==2|h9==3
		replace measles=. if b5==0			
		label variable measles "received measles"

		* Full immunization
		gen y1_full=.
		replace y1_full=0
		replace y1_full=1 if bcg==1 & polio1==1 & polio2==1 & polio3==1 & measles==1 & dpt1==1 & dpt2==1 & dpt3==1
		replace y1_full=. if bcg==. | polio1==. | polio2==. | polio3==. | measles==. | dpt1==. | dpt2==. | dpt3==.
		label var y1_full "full immunization"
		
		* Has not received a single vaccination
		gen y1_none=.
		replace y1_none=0
		replace y1_none=1 if bcg==0 & polio1==0 & polio2==0 & polio3==0 & measles==0 & dpt1==0 & dpt2==0 & dpt3==0
		replace y1_none=. if bcg==. | polio1==. | polio2==. | polio3==. | measles==. | dpt1==. | dpt2==. | dpt3==.
		label var y1_none "no vaccinations"
		

	* birth order categories
		* according to some studies, birth order as well as time between births matters for vaccination
		* the definitions of these categories are taken from a study by Antai (2010)
		* Antai, D., 2010. Migration and child immunization in Nigeria: individual-and community-level contexts. BMC Public Health 10, 116.

		tab bord
		tab b11
		gen birthcat=.
		replace birthcat=1 if bord==1
		replace birthcat=2 if (bord==2|bord==3|bord==4)&b11<=24
		replace birthcat=3 if (bord==2|bord==3|bord==4)&(b11>=25&b11<=47)
		replace birthcat=4 if (bord==2|bord==3|bord==4)&(b11>=48)
		replace birthcat=5 if (bord>=5)&b11<=24
		replace birthcat=6 if (bord>=5)&(b11>=25&b11<=47)
		replace birthcat=7 if (bord>=5)&(b11>=48)
		label variable birthcat "birth order and interval categories"
		label define birthcat 1"bord==1" 2"(bord==2|bord==3|bord==4)&(b11<=24)" 3"(bord==2|bord==3|bord==4)&(b11>=25&b11<=47)" 4"(bord==2|bord==3|bord==4)&(b11>=48" 5"(bord>=5)&(b11<=24)" 6"(bord>=5)&(b11>=25&b11<=47)" 7"(bord>=5)&(b11>=48)"
		label values birthcat birthcat
		label var sex "child is male"
		label var sex_head "household head is male"
		label var refuse_test "hiv/anemia test refusal"
		label var age "current age of child in years"
		label var nr_under5_w "nr. children <=5 years in hh"
		label var nr_hh_w "nr. hh members"	
	  
	* 	dummy for those who did not move in past 5 years
		tab v104
		gen nomigrant=1 if v104==95
		replace nomigrant=1 if v104>4 & v104<90
		replace nomigrant=0 if v104<5
		
	* 	survey weight (useful when we compare the trust variable to the Afrobarometer) 
		generate wgt = v005/1000000
		
	 
	*	unique identifiers:
		isid cluster v002 v003 midx country start_year end_year, sort
		qui compress
		qui labelbook, problems
		la drop `r(notused)'
		label data "data for the vaccination paper (DHS - sub-saharan african countries)"
		drop v119-v129 v131 v153 v157-v169a ml101 v202 v203 v217 v452c-v466 v473a v473b v501 v743a-v743f b11-m72 h2-s1025 m15_str-hb3_str v455_str v473b_str xxx v130 v133 v190a v467a-bord
		
		save "$datacons/vaccination_data_dhs.dta", replace
	 
	 
******************************************************************************************************
*B) Compile the Afrobarometer databases; correct region names to match with DHS; merge AB & DHS
******************************************************************************************************

	******************************************************************************************************
	*B1) Download the AB data
	******************************************************************************************************
	 
	* From the Afrobarometer website (https://afrobarometer.org), download all AB surveys listed in Table A.7 in the online supplementary appendix
		

	******************************************************************************************************
	/*B2)- correct region names in DHS and AB databases so that they match
		 - based on visual comparison (names slighlty written differently, accents, etc.)
		 - based on comparison DHS region boundary maps (https://spatialdata.dhsprogram.com/boundaries/#view=table&countryId=AF)
	*/	
	******************************************************************************************************

	*** DHS
		* first export dta to csv and import again (to deal with some of the weird characters)
		use "$datacons/vaccination_data_dhs.dta",clear
		export delimited using "$datacons/vaccination_data_dhs.csv", replace	
		import delimited "$datacons/vaccination_data_dhs.csv", clear 
		save "$datacons/vaccination_data_dhs2.dta",replace

		use "$datacons/vaccination_data_dhs2.dta",clear
		
		* keep only identifying info
		* match later w original database: due to csv and back, the (value)labels are lost and need to recover them
		keep cluster v002 v003 midx country start_year end_year region countrycode
		
		* drop countries not in AB database (see excel file "matching DHS AB regions")
		replace countrycode=lower(countrycode)
		local var "ao cf td km cg cd et os rw"
		foreach v of local var{
		drop if countrycode=="`v'"
		}
		
		* correct region names (see excel file "ab_dhs_regions_corr_commands")
		gen region_orig=region
		replace country=lower(country)
		replace region=lower(region)

		{
			replace region="atlantic" if region=="atlantique" & country=="benin"
			replace region="oueme" if region=="ouémé" & country=="benin"
			replace region="oueme" if region=="quémé" & country=="benin"
			replace region="central/south" if region=="central /south" & country=="burkina faso"
			replace region="centre" if region=="centre (sans ouagadougou)" & country=="burkina faso"
			replace region="centre est" if region=="centre-est" & country=="burkina faso"
			replace region="centre nord" if region=="centre-nord" & country=="burkina faso"
			replace region="centre ouest" if region=="centre-ouest" & country=="burkina faso"
			replace region="centre sud" if region=="centre-sud" & country=="burkina faso"
			replace region="nord" if region=="north" & country=="burkina faso"
			replace region="extreme nord" if region=="extrême-nord" & country=="cameroon"
			replace region="nord ouest" if region=="nord-ouest" & country=="cameroon"
			replace region="sud ouest" if region=="sud-ouest" & country=="cameroon"
			replace region="yaounde" if region=="yaoundé" & country=="cameroon"
			replace region="estuaire & moyen ogooue & ogooue maritime" if region=="west (estuaire, moyen-ogoou & ogoou-maritime)" & country=="gabon"
			replace region="haut ogooue" if region=="haut-ogooué" & country=="gabon"
			replace region="haut ogooue" if region=="east (haut-ogoou & ogoou-lolo)" & country=="gabon"
			replace region="libreville port gentil" if region=="libreville-port-gentil" & country=="gabon"
			replace region="libreville port gentil" if region=="libreville,port-gentil" & country=="gabon"
			replace region="moyen ogooue" if region=="moyen-ogooué" & country=="gabon"
			replace region="ngounie" if region=="ngounié" & country=="gabon"
			replace region="ngounie & nyanga" if region=="south (ngouni, nyanga)" & country=="gabon"
			replace region="ogooue ivindo" if region=="ogooué-ivindo" & country=="gabon"
			replace region="ogooue ivindo & woleu ntem" if region=="north (ogoou-ivindo & woleu-ntem)" & country=="gabon"
			replace region="ogooue lolo" if region=="ogooué-lolo" & country=="gabon"
			replace region="ogooue maritime" if region=="ogooué maritime" & country=="gabon"
			replace region="ashanti" if region=="ashanti region" & country=="ghana"
			replace region="brong ahafo" if region=="brong-ahafo" & country=="ghana"
			replace region="brong ahafo" if region=="brong ahafo region" & country=="ghana"
			replace region="central" if region=="central region" & country=="ghana"
			replace region="eastern" if region=="eastern region" & country=="ghana"
			replace region="greater accra" if region=="greater accra region" & country=="ghana"
			replace region="northern" if region=="northern region" & country=="ghana"
			replace region="upper east" if region=="upper east region" & country=="ghana"
			replace region="upper west" if region=="upper west region" & country=="ghana"
			replace region="upper west & upper east & northern" if region=="upper w,e & northern" & country=="ghana"
			replace region="volta" if region=="volta region" & country=="ghana"
			replace region="western" if region=="western region" & country=="ghana"
			replace region="boke" if region=="boké" & country=="guinea"
			replace region="kankan" if region=="upper guinea" & country=="guinea" // region changed, but not included in final database
			replace region="kindia" if region=="lower guinea" & country=="guinea" // region changed, but not included in final database
			replace region="labe" if region=="labé" & country=="guinea"
			replace region="nzerekore" if region=="n'zérékoré" & country=="guinea"
			replace region="nzerekore" if region=="forest guinea" & country=="guinea" // region changed, but not included in final database
			replace region="butha buthe" if region=="butha-bothe" & country=="lesotho"
			replace region="butha buthe" if region=="botha-bothe" & country=="lesotho"
			replace region="qachas nek" if region=="qasha's nek" & country=="lesotho"
			replace region="qachas nek" if region=="qacha's-nek" & country=="lesotho"
			replace region="south eastern a" if region=="grand gedeh" & country=="liberia" // region changed, but not included in final database
			replace region="amoroni mania" if region=="anamoroni'i mania" & country=="madagascar"
			replace region="atsinanana" if region=="atsimo atsinanana" & country=="madagascar"
			replace region="vatovavy fitonany" if region=="vatovavy fitovinany" & country=="madagascar"
			replace region="central" if region=="central region" & country=="malawi"
			replace region="northern" if region=="north" & country=="malawi"
			replace region="northern" if region=="northern region" & country=="malawi"
			replace region="southern" if region=="south" & country=="malawi"
			replace region="southern" if region=="southern region" & country=="malawi"
			replace region="kayes & koulikoro" if region=="kayes,koulikoro" & country=="mali"
			replace region="mopti gao tombouctou" if region=="mopti,gao,tomboucto" & country=="mali"
			replace region="sikasso segou" if region=="sikasso,segou" & country=="mali"
			replace region="tombouctou" if region=="toumbouctou" & country=="mali"
			replace region="maputo cidade" if region=="cidade de maputo" & country=="mozambique"
			replace region="maputo province" if region=="maputo provincia" & country=="mozambique"
			replace region="zambezia" if region=="zambzia" & country=="mozambique"
			replace region="tillaberi" if region=="tillabéri" & country=="niger"
			replace region="principe" if region=="região do principe" & country=="sao tome and principe" // region changed, but not included in final database
			replace region="sao tome" if region=="região sul" & country=="sao tome and principe"  		 // region changed, but not included in final database
			replace region="sao tome" if region=="região norte" & country=="sao tome and principe"  	 // region changed, but not included in final database
			replace region="sao tome" if region=="região centro" & country=="sao tome and principe" 	 // region changed, but not included in final database
			replace region="saint louis" if region=="saint-louis" & country=="senegal"
			replace region="thies" if region=="thiès" & country=="senegal"
			replace region="thies" if region=="thi?s" & country=="senegal"
			replace region="thies" if region=="thiés" & country=="senegal"
			replace region="ziguinchor" if region=="zuguinchor" & country=="senegal"
			replace region="kwazulu natal" if region=="kwazulu-natal" & country=="south africa"
			replace region="western cape" if region=="western  cape" & country=="south africa"
			replace region="coast" if region=="coastal" & country=="tanzania"
			replace region="dar es salam" if region=="dar es salaam" & country=="tanzania"
			replace region="north pemba" if region=="pemba north" & country=="tanzania"
			replace region="rest of zanzibar" if region=="rest zanzibar" & country=="tanzania"
			replace region="shinyanga" if region=="shinyinga" & country=="tanzania"
			replace region="south pemba" if region=="pemba south" & country=="tanzania"
			replace region="zanzibar south" if region=="zanziba south" & country=="tanzania"
			replace region="kara" if region=="de la kara" & country=="togo"
			replace region="lome" if region=="grande agglomération de lomé" & country=="togo"
			replace region="lome" if region=="lom" & country=="togo"
			replace region="maritime" if region=="maritime (sans agglomération de lomé)" & country=="togo"
			replace region="maritime" if region=="marities" & country=="togo"
			replace region="plateaux" if region=="des plateaux" & country=="togo"
			replace region="savanes" if region=="des savanes" & country=="togo"
			replace region="west nile" if region=="west-nile" & country=="uganda"
			replace region="north western" if region=="northwestern" & country=="zambia"
			replace region="north western" if region=="north-western" & country=="zambia"
			replace region="harare chitungwiza" if region=="harare /chitungwiza" & country=="zimbabwe"
		
			replace region="atlantic" if region=="atlnatique" & country=="benin"
			replace region="oueme" if region=="ouÃ©mÃ©" & country=="benin"
			replace region="boucle de mouhoun" if region=="boucle du mouhoun" & country=="burkina faso"
			replace region="east" if region=="est" & country=="burkina faso"
			replace region="hauts basins" if region=="hauts-bassins" & country=="burkina faso"
			replace region="hauts basins" if region=="hauts bassins" & country=="burkina faso"
			replace region="sud ouest" if region=="sud-ouest" & country=="burkina faso"
			replace region="bujumbura mairie" if region=="bujumbura marie" & country=="burundi"
			replace region="cankuza" if region=="cankuzo" & country=="burundi"
			replace region="ruyiga" if region=="ruyigi" & country=="burundi"
			replace region="adamaoua" if region=="adamawa" & country=="cameroon"
			replace region="centre" if region=="centre-yaoundÃ©" & country=="cameroon"
			replace region="east" if region=="est" & country=="cameroon"
			replace region="extreme nord" if region=="extrÃªme-nord" & country=="cameroon"
			replace region="extreme nord" if region=="extreme-north" & country=="cameroon"
			replace region="nord ouest" if region=="north west" & country=="cameroon"
			replace region="ouest" if region=="west" & country=="cameroon"
			replace region="haut ogooue" if region=="haut-ogoouÃ©" & country=="gabon"
			replace region="moyen ogooue" if region=="moyen-ogoouÃ©" & country=="gabon"
			replace region="ngounie" if region=="ngouniÃ©" & country=="gabon"
			replace region="ogooue ivindo" if region=="ogoouÃ©-ivindo" & country=="gabon"
			replace region="ogooue lolo" if region=="ogoouÃ©-lolo" & country=="gabon"
			replace region="ogooue maritime" if region=="ogoouÃ©-maritime" & country=="gabon"
			replace region="woleu ntem" if region=="woleu-ntem" & country=="gabon"
			replace region="boke" if region=="bokÃ©" & country=="guinea"
			replace region="labe" if region=="labÃ©" & country=="guinea"
			replace region="nzerekore" if region=="nâzÃ©rÃ©korÃ©" & country=="guinea"
			replace region="nzerekore" if region=="nzerekorÃ©" & country=="guinea"
			replace region="rift valley" if region=="rift-valley" & country=="kenya"
			replace region="butha buthe" if region=="butha-buthe" & country=="lesotho"
			replace region="butha buthe" if region=="buthe-buthe" & country=="lesotho"
			replace region="mohale hoek" if region=="mohaleâs hoek" & country=="lesotho"
			replace region="mohale hoek" if region=="mohale's hoek" & country=="lesotho"
			replace region="mohale hoek" if region=="mohale'hoek" & country=="lesotho"
			replace region="qachas nek" if region=="qachaâs nek" & country=="lesotho"
			replace region="qachas nek" if region=="qacha's nek" & country=="lesotho"
			replace region="thaba tseka" if region=="thaba-tseka" & country=="lesotho"
			replace region="north central" if region=="nimba" & country=="liberia"				
			replace region="north central" if region=="bong" & country=="liberia"
			replace region="north central" if region=="lofa" & country=="liberia"
			replace region="north western" if region=="grand cape mount" & country=="liberia"
			replace region="north western" if region=="bomi" & country=="liberia"
			replace region="north western" if region=="gbarpolu" & country=="liberia"
			replace region="south central" if region=="river cess" & country=="liberia"
			replace region="south central" if region=="margibi" & country=="liberia"
			replace region="south central" if region=="grand bassa" & country=="liberia"
			replace region="south central" if region=="montserrado" & country=="liberia"		// region changed, but not included in final database
			replace region="south central" if region=="rivercess" & country=="liberia"
			replace region="south eastern a" if region=="sinoe" & country=="liberia"			// region changed, but not included in final database
			replace region="south eastern b" if region=="river gee" & country=="liberia"
			replace region="south eastern b" if region=="grand kru" & country=="liberia"
			replace region="south eastern b" if region=="maryland" & country=="liberia"
			replace region="amoroni mania" if region=="amoron'i mania" & country=="madagascar"
			replace region="segou" if region=="sÃ©gou" & country=="mali"
			replace region="maputo cidade" if region=="maputo city" & country=="mozambique"
			replace region="maputo cidade" if region=="maputo (cid.)" & country=="mozambique"
			replace region="maputo province" if region=="maputo (prov.)" & country=="mozambique"
			replace region="maputo province" if region=="maputo" & country=="mozambique"
			replace region="zambezia" if region=="zambÃ©zia" & country=="mozambique"
			replace region="karas" if region=="!karas" & country=="namibia"
			replace region="otjozondjupa" if region=="otjozundjupa" & country=="namibia"
			replace region="tillaberi" if region=="tillabÃ©ri" & country=="niger"
			replace region="nasarawa" if region=="nassarawa" & country=="nigeria"
			replace region="kedougou" if region=="kÃ©dougou" & country=="senegal"
			replace region="sedhiou" if region=="sÃ©dhiou" & country=="senegal"
			replace region="eastern" if region=="eastern province" & country=="sierra leone"
			replace region="eastern" if region=="east" & country=="sierra leone"
			replace region="northern" if region=="northern province" & country=="sierra leone"
			replace region="northern" if region=="north" & country=="sierra leone"
			replace region="southern" if region=="southern province" & country=="sierra leone"
			replace region="southern" if region=="south" & country=="sierra leone"
			replace region="western" if region=="western area" & country=="sierra leone"
			replace region="mpumalanga" if region=="mpumulanga" & country=="south africa"
			replace region="eastern" if region=="east" & country=="sudan"
			replace region="khartoum" if region=="khartom" & country=="sudan"
			replace region="kordofan" if region=="kurdufan" & country=="sudan"
			replace region="coast" if region=="coast(pwani)" & country=="tanzania"
			replace region="dar es salam" if region=="dares salaam" & country=="tanzania"
			replace region="dar es salam" if region=="dar" & country=="tanzania"
			replace region="kilimanjaro" if region=="kilimajaro" & country=="tanzania"
			replace region="mtwara" if region=="mrwara" & country=="tanzania"
			replace region="north pemba" if region=="kaskazini pemba" & country=="tanzania"
			replace region="north pemba" if region=="pemba kaskazini" & country=="tanzania"
			replace region="north unguja" if region=="unguja kaskazini" & country=="tanzania"
			replace region="north unguja" if region=="kaskazini unguja" & country=="tanzania"
			replace region="south pemba" if region=="kusini pemba" & country=="tanzania"
			replace region="south pemba" if region=="pemba kusini" & country=="tanzania"
			replace region="south unfuja" if region=="unfuja kusini" & country=="tanzania"
			replace region="south unguja" if region=="kusini unguja" & country=="tanzania"
			replace region="lome" if region=="lome commune" & country=="togo"
			replace region="lome" if region=="lomÃ© commune" & country=="togo"
			replace region="eastern" if region=="east" & country=="uganda"
			replace region="northern" if region=="north" & country=="uganda"
			replace region="western" if region=="west" & country=="uganda"
			replace region="copperbelt" if region=="copper belt" & country=="zambia"
			replace region="mashonaland central" if region=="mash central" & country=="zimbabwe"
			replace region="mashonaland east" if region=="mash east" & country=="zimbabwe"
			replace region="mashonaland west" if region=="mash west" & country=="zimbabwe"
			replace region="matabeleland north" if region=="matebeleland north" & country=="zimbabwe"
			replace region="matabeleland north" if region=="matebeland north" & country=="zimbabwe"
			replace region="matabeleland south" if region=="matebeland south" & country=="zimbabwe"
			replace region="matabeleland south" if region=="matebeleland south" & country=="zimbabwe"
			replace region="midlands" if region=="midland" & country=="zimbabwe"	
		
		}	
		
		rename region region_corr
		sort cluster v002 v003 midx country start_year end_year
		save "$temp/dhs_regioncorr.dta",replace

		* merge back with original data 
		use "$datacons/vaccination_data_dhs.dta",clear
			
			* drop countries not in AB
			replace country=lower(country)
			replace countrycode=lower(countrycode)
			local var "ao cf td km cg cd et os rw"
			foreach v of local var{
			drop if countrycode=="`v'"
			}		
			
			* sort & merge
			sort cluster v002 v003 midx country start_year end_year
			merge 1:1 cluster v002 v003 midx country start_year end_year using "$temp/dhs_regioncorr.dta"
			drop _merge // perfect match
		
			save "$datacons/vaccination_data_dhs3.dta",replace

		
	*** AB
		
		* drop countries that are not in DHS
		* keep the identifying vars + vars related to trust for each AB round
		* export to csv and import again to deal with strange characters & correct region names
		* merge csv w corrected region names back to trust database for each round
		
		forvalues x=2(1)7{
		use "$abdata/Round`x'/merged_r`x'_data.dta", clear
		
		* keep only countries that are also in DHS database 
		* make sure country name is written in same way as in DHS 
		rename country countryc
		rename region regionc
		decode countryc, generate(country)
		decode regionc, generate(region)
		replace country=lower(country)
		replace region=lower(region)
		local var "algeria botswana egypt mauritius morocco tunisia"
		foreach v of local var{
		drop if country=="`v'"
		}
		drop if country=="cape verde"
		replace country="sao tome and principe" if country=="são tomé and príncipe"
		replace country="cote d'ivoire" if country=="cote d’ivoire"
		
		* keep all variables with 'trust' or 'health' in label
		* keep weights 
		ds, has(varl *trust* *Trust* *health* *Health* *weight*)
		local keep1 (`r(varlist)')
		keep `keep1' respno country region dateintr
		save "$temp/r`x'_trust.dta",replace
			
		* keep only identifying info for csv. later merge with *_trust.dta
		keep respno country region
		export delimited using "$temp/r`x'.csv", replace	
		import delimited "$temp/r`x'.csv", clear varnames(1) 
		save "$temp/r`x'_2.dta",replace
		}	
		
		
		* we want to match based on YOB in DHS, with different AB rounds
		* keep the AB rounds separate for now
		* correct the region names to match with DHS
		forvalues x=2(1)7{
		use "$temp/r`x'_2.dta", clear
		gen region_orig=region
		
		* correct region names 
		replace region="atlantic" if region=="atlantique" & country=="benin"
		replace region="oueme" if region=="ouémé" & country=="benin"
		replace region="oueme" if region=="quémé" & country=="benin"
		replace region="central/south" if region=="central /south" & country=="burkina faso"
		replace region="centre" if region=="centre (sans ouagadougou)" & country=="burkina faso"
		replace region="centre est" if region=="centre-est" & country=="burkina faso"
		replace region="centre nord" if region=="centre-nord" & country=="burkina faso"
		replace region="centre ouest" if region=="centre-ouest" & country=="burkina faso"
		replace region="centre sud" if region=="centre-sud" & country=="burkina faso"
		replace region="nord" if region=="north" & country=="burkina faso"
		replace region="extreme nord" if region=="extrême-nord" & country=="cameroon"
		replace region="nord ouest" if region=="nord-ouest" & country=="cameroon"
		replace region="sud ouest" if region=="sud-ouest" & country=="cameroon"
		replace region="yaounde" if region=="yaoundé" & country=="cameroon"
		replace region="estuaire & moyen ogooue & ogooue maritime" if region=="west (estuaire, moyen-ogoou & ogoou-maritime)" & country=="gabon"
		replace region="haut ogooue" if region=="haut-ogooué" & country=="gabon"
		replace region="haut ogooue" if region=="east (haut-ogoou & ogoou-lolo)" & country=="gabon"
		replace region="libreville port gentil" if region=="libreville-port-gentil" & country=="gabon"
		replace region="libreville port gentil" if region=="libreville,port-gentil" & country=="gabon"
		replace region="moyen ogooue" if region=="moyen-ogooué" & country=="gabon"
		replace region="ngounie" if region=="ngounié" & country=="gabon"
		replace region="ngounie & nyanga" if region=="south (ngouni, nyanga)" & country=="gabon"
		replace region="ogooue ivindo" if region=="ogooué-ivindo" & country=="gabon"
		replace region="ogooue ivindo & woleu ntem" if region=="north (ogoou-ivindo & woleu-ntem)" & country=="gabon"
		replace region="ogooue lolo" if region=="ogooué-lolo" & country=="gabon"
		replace region="ogooue maritime" if region=="ogooué maritime" & country=="gabon"
		replace region="ashanti" if region=="ashanti region" & country=="ghana"
		replace region="brong ahafo" if region=="brong-ahafo" & country=="ghana"
		replace region="brong ahafo" if region=="brong ahafo region" & country=="ghana"
		replace region="central" if region=="central region" & country=="ghana"
		replace region="eastern" if region=="eastern region" & country=="ghana"
		replace region="greater accra" if region=="greater accra region" & country=="ghana"
		replace region="northern" if region=="northern region" & country=="ghana"
		replace region="upper east" if region=="upper east region" & country=="ghana"
		replace region="upper west" if region=="upper west region" & country=="ghana"
		replace region="upper west & upper east & northern" if region=="upper w,e & northern" & country=="ghana"
		replace region="volta" if region=="volta region" & country=="ghana"
		replace region="western" if region=="western region" & country=="ghana"
		replace region="boke" if region=="boké" & country=="guinea"
		replace region="kankan" if region=="upper guinea" & country=="guinea" // region changed, but not included in final database 
		replace region="kindia" if region=="lower guinea" & country=="guinea" // region changed, but not included in final database
		replace region="labe" if region=="labé" & country=="guinea"
		replace region="nzerekore" if region=="n'zérékoré" & country=="guinea"
		replace region="nzerekore" if region=="forest guinea" & country=="guinea" // region changed, but not included in final database 
		replace region="butha buthe" if region=="butha-bothe" & country=="lesotho"
		replace region="butha buthe" if region=="botha-bothe" & country=="lesotho"
		replace region="qachas nek" if region=="qasha's nek" & country=="lesotho"
		replace region="qachas nek" if region=="qacha's-nek" & country=="lesotho"
		replace region="south eastern a" if region=="grand gedeh" & country=="liberia" // region changed, but not included in final database
		replace region="amoroni mania" if region=="anamoroni'i mania" & country=="madagascar"
		replace region="atsinanana" if region=="atsimo atsinanana" & country=="madagascar"
		replace region="vatovavy fitonany" if region=="vatovavy fitovinany" & country=="madagascar"
		replace region="central" if region=="central region" & country=="malawi"
		replace region="northern" if region=="north" & country=="malawi"
		replace region="northern" if region=="northern region" & country=="malawi"
		replace region="southern" if region=="south" & country=="malawi"
		replace region="southern" if region=="southern region" & country=="malawi"
		replace region="kayes & koulikoro" if region=="kayes,koulikoro" & country=="mali"
		replace region="mopti gao tombouctou" if region=="mopti,gao,tomboucto" & country=="mali"
		replace region="sikasso segou" if region=="sikasso,segou" & country=="mali"
		replace region="tombouctou" if region=="toumbouctou" & country=="mali"
		replace region="maputo cidade" if region=="cidade de maputo" & country=="mozambique"
		replace region="maputo province" if region=="maputo provincia" & country=="mozambique"
		replace region="maputo province" if region=="maputo" & country=="mozambique"
		replace region="zambezia" if region=="zambzia" & country=="mozambique"
		replace region="tillaberi" if region=="tillabéri" & country=="niger"
		replace region="principe" if region=="região do principe" & country=="sao tome and principe" // region changed, but not included in final database
		replace region="sao tome" if region=="região sul" & country=="sao tome and principe"  		 // region changed, but not included in final database
		replace region="sao tome" if region=="região norte" & country=="sao tome and principe"  	 // region changed, but not included in final database
		replace region="sao tome" if region=="região centro" & country=="sao tome and principe" 	 // region changed, but not included in final database
		replace region="saint louis" if region=="saint-louis" & country=="senegal"
		replace region="thies" if region=="thiès" & country=="senegal"
		replace region="thies" if region=="thi?s" & country=="senegal"
		replace region="thies" if region=="thiés" & country=="senegal"
		replace region="ziguinchor" if region=="zuguinchor" & country=="senegal"
		replace region="kwazulu natal" if region=="kwazulu-natal" & country=="south africa"
		replace region="western cape" if region=="western  cape" & country=="south africa"
		replace region="coast" if region=="coastal" & country=="tanzania"
		replace region="dar es salam" if region=="dar es salaam" & country=="tanzania"
		replace region="north pemba" if region=="pemba north" & country=="tanzania"
		replace region="rest of zanzibar" if region=="rest zanzibar" & country=="tanzania"
		replace region="shinyanga" if region=="shinyinga" & country=="tanzania"
		replace region="south pemba" if region=="pemba south" & country=="tanzania"
		replace region="zanzibar south" if region=="zanziba south" & country=="tanzania"
		replace region="kara" if region=="de la kara" & country=="togo"
		replace region="lome" if region=="grande agglomération de lomé" & country=="togo"
		replace region="lome" if region=="lom" & country=="togo"
		replace region="maritime" if region=="maritime (sans agglomération de lomé)" & country=="togo"
		replace region="maritime" if region=="marities" & country=="togo"
		replace region="plateaux" if region=="des plateaux" & country=="togo"
		replace region="savanes" if region=="des savanes" & country=="togo"
		replace region="west nile" if region=="west-nile" & country=="uganda"
		replace region="north western" if region=="northwestern" & country=="zambia"
		replace region="north western" if region=="north-western" & country=="zambia"
		replace region="harare chitungwiza" if region=="harare /chitungwiza" & country=="zimbabwe"
		replace region="atlantic" if region=="atlnatique" & country=="benin"
		replace region="oueme" if region=="ouÃ©mÃ©" & country=="benin"
		replace region="boucle de mouhoun" if region=="boucle du mouhoun" & country=="burkina faso"
		replace region="east" if region=="est" & country=="burkina faso"
		replace region="hauts basins" if region=="hauts-bassins" & country=="burkina faso"
		replace region="hauts basins" if region=="hauts bassins" & country=="burkina faso"
		replace region="sud ouest" if region=="sud-ouest" & country=="burkina faso"
		replace region="bujumbura mairie" if region=="bujumbura marie" & country=="burundi"
		replace region="cankuza" if region=="cankuzo" & country=="burundi"
		replace region="ruyiga" if region=="ruyigi" & country=="burundi"
		replace region="adamaoua" if region=="adamawa" & country=="cameroon"
		replace region="centre" if region=="centre-yaoundÃ©" & country=="cameroon"
		replace region="east" if region=="est" & country=="cameroon"
		replace region="extreme nord" if region=="extrÃªme-nord" & country=="cameroon"
		replace region="extreme nord" if region=="extreme-north" & country=="cameroon"
		replace region="nord ouest" if region=="north west" & country=="cameroon"
		replace region="ouest" if region=="west" & country=="cameroon"
		replace region="haut ogooue" if region=="haut-ogoouÃ©" & country=="gabon"
		replace region="moyen ogooue" if region=="moyen-ogoouÃ©" & country=="gabon"
		replace region="ngounie" if region=="ngouniÃ©" & country=="gabon"
		replace region="ogooue ivindo" if region=="ogoouÃ©-ivindo" & country=="gabon"
		replace region="ogooue lolo" if region=="ogoouÃ©-lolo" & country=="gabon"
		replace region="ogooue maritime" if region=="ogoouÃ©-maritime" & country=="gabon"
		replace region="woleu ntem" if region=="woleu-ntem" & country=="gabon"
		replace region="boke" if region=="bokÃ©" & country=="guinea"
		replace region="labe" if region=="labÃ©" & country=="guinea"
		replace region="nzerekore" if region=="nâzÃ©rÃ©korÃ©" & country=="guinea"
		replace region="nzerekore" if region=="nzerekorÃ©" & country=="guinea"
		replace region="rift valley" if region=="rift-valley" & country=="kenya"
		replace region="butha buthe" if region=="butha-buthe" & country=="lesotho"
		replace region="butha buthe" if region=="buthe-buthe" & country=="lesotho"
		replace region="mohale hoek" if region=="mohaleâs hoek" & country=="lesotho"
		replace region="mohale hoek" if region=="mohale's hoek" & country=="lesotho"
		replace region="mohale hoek" if region=="mohale'hoek" & country=="lesotho"
		replace region="qachas nek" if region=="qachaâs nek" & country=="lesotho"
		replace region="qachas nek" if region=="qacha's nek" & country=="lesotho"
		replace region="thaba tseka" if region=="thaba-tseka" & country=="lesotho"
		replace region="north central" if region=="nimba" & country=="liberia"				
		replace region="north central" if region=="bong" & country=="liberia"
		replace region="north central" if region=="lofa" & country=="liberia"
		replace region="north western" if region=="grand cape mount" & country=="liberia"
		replace region="north western" if region=="bomi" & country=="liberia"
		replace region="north western" if region=="gbarpolu" & country=="liberia"
		replace region="south central" if region=="river cess" & country=="liberia"
		replace region="south central" if region=="margibi" & country=="liberia"
		replace region="south central" if region=="grand bassa" & country=="liberia"
		replace region="south central" if region=="montserrado" & country=="liberia"		
		replace region="south central" if region=="rivercess" & country=="liberia"
		replace region="south eastern a" if region=="sinoe" & country=="liberia"			
		replace region="south eastern b" if region=="river gee" & country=="liberia"
		replace region="south eastern b" if region=="grand kru" & country=="liberia"
		replace region="south eastern b" if region=="maryland" & country=="liberia"
		replace region="amoroni mania" if region=="amoron'i mania" & country=="madagascar"
		replace region="segou" if region=="sÃ©gou" & country=="mali"
		replace region="maputo cidade" if region=="maputo city" & country=="mozambique"
		replace region="maputo cidade" if region=="maputo (cid.)" & country=="mozambique"
		replace region="maputo province" if region=="maputo (prov.)" & country=="mozambique"
		replace region="zambezia" if region=="zambÃ©zia" & country=="mozambique"
		replace region="karas" if region=="!karas" & country=="namibia"
		replace region="kavango" if region=="kavango west" & country=="namibia"
		replace region="kavango" if region=="kavango east" & country=="namibia"
		replace region="otjozondjupa" if region=="otjozundjupa" & country=="namibia"
		replace region="tillaberi" if region=="tillabÃ©ri" & country=="niger"
		replace region="nasarawa" if region=="nassarawa" & country=="nigeria"
		replace region="kedougou" if region=="kÃ©dougou" & country=="senegal"
		replace region="sedhiou" if region=="sÃ©dhiou" & country=="senegal"
		replace region="eastern" if region=="eastern province" & country=="sierra leone"
		replace region="eastern" if region=="east" & country=="sierra leone"
		replace region="northern" if region=="northern province" & country=="sierra leone"
		replace region="northern" if region=="north" & country=="sierra leone"
		replace region="southern" if region=="southern province" & country=="sierra leone"
		replace region="southern" if region=="south" & country=="sierra leone"
		replace region="western" if region=="western area" & country=="sierra leone"
		replace region="mpumalanga" if region=="mpumulanga" & country=="south africa"
		replace region="eastern" if region=="east" & country=="sudan"
		replace region="khartoum" if region=="khartom" & country=="sudan"
		replace region="kordofan" if region=="kurdufan" & country=="sudan"
		replace region="coast" if region=="coast(pwani)" & country=="tanzania"
		replace region="dar es salam" if region=="dares salaam" & country=="tanzania"
		replace region="dar es salam" if region=="dar" & country=="tanzania"
		replace region="kilimanjaro" if region=="kilimajaro" & country=="tanzania"
		replace region="mtwara" if region=="mrwara" & country=="tanzania"
		replace region="north pemba" if region=="kaskazini pemba" & country=="tanzania"
		replace region="north pemba" if region=="pemba kaskazini" & country=="tanzania"
		replace region="north unguja" if region=="unguja kaskazini" & country=="tanzania"
		replace region="north unguja" if region=="kaskazini unguja" & country=="tanzania"
		replace region="south pemba" if region=="kusini pemba" & country=="tanzania"
		replace region="south pemba" if region=="pemba kusini" & country=="tanzania"
		replace region="south unfuja" if region=="unfuja kusini" & country=="tanzania"
		replace region="south unguja" if region=="kusini unguja" & country=="tanzania"
		replace region="lome" if region=="lome commune" & country=="togo"
		replace region="lome" if region=="lomÃ© commune" & country=="togo"
		replace region="eastern" if region=="east" & country=="uganda"
		replace region="northern" if region=="north" & country=="uganda"
		replace region="western" if region=="west" & country=="uganda"
		replace region="copperbelt" if region=="copper belt" & country=="zambia"
		replace region="mashonaland central" if region=="mash central" & country=="zimbabwe"
		replace region="mashonaland east" if region=="mash east" & country=="zimbabwe"
		replace region="mashonaland west" if region=="mash west" & country=="zimbabwe"
		replace region="matabeleland north" if region=="matebeleland north" & country=="zimbabwe"
		replace region="matabeleland north" if region=="matebeland north" & country=="zimbabwe"
		replace region="matabeleland south" if region=="matebeland south" & country=="zimbabwe"
		replace region="matabeleland south" if region=="matebeleland south" & country=="zimbabwe"
		replace region="midlands" if region=="midland" & country=="zimbabwe"	
	
		replace country="côte d'ivoire" if country=="cÃ´te d'ivoire"

		rename region region_corr
		sort respno
		save "$temp/r`x'_3.dta",replace
		}
		
		
		* merge file w corrected region names back with *_trust.dta
		forvalues x=2(1)7{
		use "$temp/r`x'_trust.dta", clear
		sort respno 
		merge 1:1 respno country using "$temp/r`x'_3.dta"
		drop _merge // perfect match
		save "$temp/r`x'_trust2.dta",replace
		}	

	 

	******************************************************************************************************
	/*B3)- Prepare AB database
		 - Questions and question nr. are different across AB rounds! 
		 - run loop to rename variables in various rounds so that they match
		 - then append AB database, collapse vars of interest to country region year level
	*/	
	******************************************************************************************************
		
		forvalues x=2(1)7{
		use "$temp/r`x'_trust2.dta", clear
		
		ds, has(varl "*improving basic health*")
		local var (`r(varlist)')
		rename `var' health_improving
		
		ds, has(varl "*Health Clinic*")
		local var (`r(varlist)')
		rename `var' health_clinic	
		
		ds, has(varl *President* *president*)
		local var (`r(varlist)')
		rename `var' trust_president

		ds, has(varl *Parliament* *parliament*)
		local var (`r(varlist)')
		rename `var' trust_parliament
		
		ds, has(varl "*Electoral Commission*" "*electoral commission*")
		local var (`r(varlist)')
		rename `var' trust_electoral	
		
		ds, has(varl "*local government*" "*local council*") 
		local var (`r(varlist)')
		rename `var' trust_locgov	
		
		ds, has(varl "*ruling party*") 
		local var (`r(varlist)')
		rename `var' trust_party
		
		ds, has(varl "*opposition political*") 
		local var (`r(varlist)')
		rename `var' trust_opposition
		
		ds, has(varl "*police*") 
		local var (`r(varlist)')
		rename `var' trust_police
		
		ds, has(varl "*courts of law*") 
		local var (`r(varlist)')
		rename `var' trust_courts

		* not in round 4	
		if `x'!=4{
		ds, has(varl *army* *military*) 
		local var (`r(varlist)')
		rename `var' trust_army
		}
		
		if `x'!=2  & `x'!=3 & `x'!=6 & `x'!=7{
		* not in round 2 and 3 and 6 and 7
		ds, has(varl "*other people*" ) 
		local var (`r(varlist)')
		rename `var' trust_otherpeople	
		}
		
		if `x'!=2 & `x'!=4 & `x'!=6 & `x'!=7{	
		* not in round 2 & 4 & 6 & 7
		ds, has(varl "*Most people*" "*most people*") 
		local var (`r(varlist)')
		rename `var' trust_general 	
		
		ds, has(varl "*neighbors*" "*neighbours*") 
		local var (`r(varlist)')
		rename `var' trust_neighbors 	
		}

		if `x'!=2 & `x'!=6 & `x'!=7 {	
		* not in round 2 and 6 and 7
		ds, has(varl "*relatives*") 
		local var (`r(varlist)')
		rename `var' trust_relatives 
		}
		
		if `x'!=3 & `x'!=5 {	
		* not in round 3 and 5
		ds, has(varl "*traditional leaders*") 
		local var (`r(varlist)')
		rename `var' trust_trad	
		}	
		
		if `x'==6 | `x'==7  {		
		* only round 6 & 7
		ds, has(varl "*religious leaders*") 
		local var (`r(varlist)')
		rename `var' trust_religion
		}	
		
		keep trust* health* respno dateintr country region* *wt
		gen round=`x'
		sort respno
		save "$abdata/r`x'_trust.dta",replace
		}
		
		use "$abdata/r7_trust.dta",clear
		append using "$abdata/r6_trust.dta"
		append using "$abdata/r5_trust.dta"
		append using "$abdata/r4_trust.dta"
		append using "$abdata/r3_trust.dta"
		append using "$abdata/r2_trust.dta"  
 
 
	* some additional corrections required to region variables in AB database
	* these were discovered after first attempts of merging AB and DHS 
		
	replace country="cote d'ivoire" if country=="côte d'ivoire" 
	
	* Note that below we only keep countries for which the regions matched between DHS & AB, 
	* dropping countries for which we had to aggregate regions in either DHS or AB
	
	* IVORY COAST: DHS only has higer level ADM levels (see excel file "ab_dhs_regions_corr_commands") 
	replace region_corr="ville d'abidjan" if region_corr=="abidjan" & country=="cote d'ivoire"
	replace region_corr="sud sans abidjan" if region_corr=="agneby" & country=="cote d'ivoire"
	replace region_corr="sud sans abidjan" if region_corr=="agneby-tiassa" & country=="cote d'ivoire"
	replace region_corr="nord-ouest" if region_corr=="bafing" & country=="cote d'ivoire"
	replace region_corr="nord-ouest" if region_corr=="bafing" & country=="cote d'ivoire"
	replace region_corr="nord" if region_corr=="bagouÃ©" & country=="cote d'ivoire"
	replace region_corr="sud-ouest" if region_corr=="bas sassandra" & country=="cote d'ivoire"
	replace region_corr="nord-est" if region_corr=="bounkani" & country=="cote d'ivoire"
	replace region_corr="centre" if region_corr=="bÃ©lier" & country=="cote d'ivoire"
	replace region_corr="nord-ouest" if region_corr=="bÃ©rÃ©" & country=="cote d'ivoire"
	replace region_corr="ouest" if region_corr=="cavally" & country=="cote d'ivoire"
	replace region_corr="nord-ouest" if region_corr=="denguele" & country=="cote d'ivoire"
	replace region_corr="centre-ouest" if region_corr=="fromager" & country=="cote d'ivoire"
	replace region_corr="sud-ouest" if region_corr=="gboklÃ¨" & country=="cote d'ivoire"
	replace region_corr="sud-ouest" if region_corr=="gbÃ©kÃ¨" & country=="cote d'ivoire"
	replace region_corr="nord-est" if region_corr=="gontougo" & country=="cote d'ivoire"
	replace region_corr="sud sans abidjan" if region_corr=="grands ponts" & country=="cote d'ivoire"
	replace region_corr="ouest" if region_corr=="guÃ©mon" & country=="cote d'ivoire"
	replace region_corr="sud sans abidjan" if region_corr=="gÃ´h" & country=="cote d'ivoire"
	replace region_corr="centre-nord" if region_corr=="hambol" & country=="cote d'ivoire"
	replace region_corr="centre-ouest" if region_corr=="haut sassandra" & country=="cote d'ivoire"
	replace region_corr="centre-ouest" if region_corr=="haut sassandra" & country=="cote d'ivoire"
	replace region_corr="" if region_corr=="iffou" & country=="cote d'ivoire"
	replace region_corr="centre-est" if region_corr=="indeniÃ©-djuablin" & country=="cote d'ivoire"
	replace region_corr="nord-ouest" if region_corr=="kabadougou" & country=="cote d'ivoire"
	replace region_corr="sud sans abidjan" if region_corr=="la mÃ©" & country=="cote d'ivoire"
	replace region_corr="centre" if region_corr=="lacs" & country=="cote d'ivoire"
	replace region_corr="sud sans abidjan" if region_corr=="lagunes" & country=="cote d'ivoire"
	replace region_corr="sud sans abidjan" if region_corr=="lÃ´h-djiboua" & country=="cote d'ivoire"
	replace region_corr="centre-ouest" if region_corr=="marahoue" & country=="cote d'ivoire"
	replace region_corr="centre-ouest" if region_corr=="marahouÃ©" & country=="cote d'ivoire"
	replace region_corr="ouest" if region_corr=="montagnes" & country=="cote d'ivoire"
	replace region_corr="centre" if region_corr=="moronou" & country=="cote d'ivoire"
	replace region_corr="ouest" if region_corr=="moyen cavally" & country=="cote d'ivoire"
	replace region_corr="sud sans abidjan" if region_corr=="moyen comoe" & country=="cote d'ivoire"
	replace region_corr="centre" if region_corr=="n'zi" & country=="cote d'ivoire"
	replace region_corr="sud-ouest" if region_corr=="nawa" & country=="cote d'ivoire"
	replace region_corr="sud sans abidjan" if region_corr=="nâzi comoe" & country=="cote d'ivoire"
	replace region_corr="nord" if region_corr=="poro" & country=="cote d'ivoire"
	replace region_corr="sud-ouest" if region_corr=="san pedro" & country=="cote d'ivoire"
	replace region_corr="nord" if region_corr=="savanes" & country=="cote d'ivoire"
	replace region_corr="centre-nord" if region_corr=="sud bandama" & country=="cote d'ivoire"
	replace region_corr="sud sans abidjan" if region_corr=="sud comoe" & country=="cote d'ivoire"
	replace region_corr="sud sans abidjan" if region_corr=="sud comoÃ©" & country=="cote d'ivoire"
	replace region_corr="nord" if region_corr=="tchologo" & country=="cote d'ivoire"
	replace region_corr="ouest" if region_corr=="tonkpi" & country=="cote d'ivoire"
	replace region_corr="centre-nord" if region_corr=="vallee du bandama" & country=="cote d'ivoire"
	replace region_corr="ville d'abidjan" if region_corr=="ville dâabidjan" & country=="cote d'ivoire"
	replace region_corr="nord-ouest" if region_corr=="worodougou" & country=="cote d'ivoire"
	replace region_corr="nord-ouest" if region_corr=="worodougou" & country=="cote d'ivoire"
	replace region_corr="nord-est" if region_corr=="zanzan" & country=="cote d'ivoire"			
	
	* CAMEROON: 
	* replace north=nord
	* combine youende & centre (subregion - not in all surveys separate)
	* combine littoral & douala (subregion - not in all surveys separate)
	replace region_corr="nord" if region_corr=="north" & country=="cameroon"
	replace region_corr="centre" if region_corr=="yaounde" & country=="cameroon"
	replace region_corr="littoral" if region_corr=="littoral-douala" & country=="cameroon"
	replace region_corr="littoral" if region_corr=="douala" & country=="cameroon"

	* NAMIBIA:
	replace region_corr="khomas" if region_corr=="knoma" & country=="namibia"
	
	* TANZANIA:
	replace region_corr="south unguja" if region_corr=="south unfuja" & country=="tanzania"	
	* Njombe split from Iringa in 2012. Keep at aggregate Iringa level
	* Katavi split from Rukwa in 2012. Keep at aggregate Rukwa level
	* Parts of Mwanza & Shinyanga split to form Geita & Simuyu in 2012. Put all four at Mwanza level
	replace region_corr="rukwa" if region_corr=="katavi" & country=="tanzania" 
	replace region_corr="iringa" if region_corr=="njombe" & country=="tanzania" 
	replace region_corr="mwanza" if region_corr=="simiyu" & country=="tanzania" 
	replace region_corr="mwanza" if region_corr=="shinyanga" & country=="tanzania" 
	replace region_corr="mwanza" if region_corr=="geita" & country=="tanzania" 

	* ZIMBABWE:
	replace region_corr="matabeleland north" if region_corr=="mat north" & country=="zimbabwe"
	replace region_corr="matabeleland south" if region_corr=="mat south" & country=="zimbabwe"
	
	* KENYA:
	replace region_corr="north eastern" if region_corr=="northeastern" & country=="kenya"
	
	* NIGERIA: DHS only has higer level ADM levels 
	replace region_corr="south east" if region_corr=="abia" & country=="nigeria"						
	replace region_corr="north east" if region_corr=="adamawa" & country=="nigeria"						
	replace region_corr="south south" if region_corr=="akwa-ibom" & country=="nigeria"						
	replace region_corr="south east" if region_corr=="anambra" & country=="nigeria"						
	replace region_corr="north east" if region_corr=="bauchi" & country=="nigeria"						
	replace region_corr="south south" if region_corr=="bayelsa" & country=="nigeria"						
	replace region_corr="north central" if region_corr=="benue" & country=="nigeria"						
	replace region_corr="north east" if region_corr=="borno" & country=="nigeria"						
	replace region_corr="south south" if region_corr=="cross-river" & country=="nigeria"						
	replace region_corr="south south" if region_corr=="delta" & country=="nigeria"						
	replace region_corr="south east" if region_corr=="ebonyi" & country=="nigeria"						
	replace region_corr="south south" if region_corr=="edo" & country=="nigeria"						
	replace region_corr="south west" if region_corr=="ekiti" & country=="nigeria"						
	replace region_corr="south east" if region_corr=="enugu" & country=="nigeria"						
	replace region_corr="north central" if region_corr=="fct" & country=="nigeria"						
	replace region_corr="north east" if region_corr=="gombe" & country=="nigeria"						
	replace region_corr="south east" if region_corr=="imo" & country=="nigeria"						
	replace region_corr="north west" if region_corr=="jigawa" & country=="nigeria"						
	replace region_corr="north west" if region_corr=="kaduna" & country=="nigeria"						
	replace region_corr="north west" if region_corr=="kano" & country=="nigeria"						
	replace region_corr="north west" if region_corr=="katsina" & country=="nigeria"						
	replace region_corr="north west" if region_corr=="kebbi" & country=="nigeria"						
	replace region_corr="north central" if region_corr=="kogi" & country=="nigeria"						
	replace region_corr="north central" if region_corr=="kwara" & country=="nigeria"						
	replace region_corr="south west" if region_corr=="lagos" & country=="nigeria"						
	replace region_corr="north central" if region_corr=="nasarawa" & country=="nigeria"						
	replace region_corr="north central" if region_corr=="nc" & country=="nigeria"						
	replace region_corr="north east" if region_corr=="ne" & country=="nigeria"						
	replace region_corr="north central" if region_corr=="niger" & country=="nigeria"						
	replace region_corr="north west" if region_corr=="nw" & country=="nigeria"						
	replace region_corr="south west" if region_corr=="ogun" & country=="nigeria"						
	replace region_corr="south west" if region_corr=="ondo" & country=="nigeria"						
	replace region_corr="south west" if region_corr=="osun" & country=="nigeria"						
	replace region_corr="south west" if region_corr=="oyo" & country=="nigeria"						
	replace region_corr="north central" if region_corr=="plateau" & country=="nigeria"						
	replace region_corr="south south" if region_corr=="rivers" & country=="nigeria"						
	replace region_corr="south east" if region_corr=="se" & country=="nigeria"						
	replace region_corr="north west" if region_corr=="sokoto" & country=="nigeria"						
	replace region_corr="south south" if region_corr=="ss" & country=="nigeria"						
	replace region_corr="south west" if region_corr=="sw" & country=="nigeria"						
	replace region_corr="north east" if region_corr=="taraba" & country=="nigeria"						
	replace region_corr="north east" if region_corr=="yobe" & country=="nigeria"						
	replace region_corr="north west" if region_corr=="zamfara" & country=="nigeria"						
	
	* SENEGAL: 	* some administrative changes only adapted later in AB than in DHS
				* sedhiou was part of kolda before // kedougou was part of tambacounda before // kaffrine was part of kaolack before // matam was part of saint louis before
				* keep everything at most aggregate level, to make sure "region corr" remains unchanged over time
	replace region_corr="kolda" if region_corr=="sedhiou" & country=="senegal" 
	replace region_corr="tambacounda" if region_corr=="kedougou" & country=="senegal"
	replace region_corr="kaolack" if region_corr=="kaffrine" & country=="senegal" 
	replace region_corr="saint louis" if region_corr=="matam" & country=="senegal" 
	
	* ZAMBIA (region muchinga split off from northern - keep at aggregate level)
	replace region_corr="northern" if region_corr=="muchinga" & country=="zambia"		
	
	* changes after adding round 7
	replace region_corr="nzerekore" if region_corr=="n'zÃ©rÃ©korÃ©"
	replace region_corr="extreme nord" if region_corr=="extreme-nord"
	replace region_corr="ogooue lolo" if region_corr=="ogoouÃ©_lolo"

 
 	* Indicate for each region / country the time when data collection was finished
	gen year=year(dateintr)
	bysort round country region_corr:egen year_f=max(year) 
				
	* Collapse variables at region level
	// note that value labels are different in round 2
	// in round 2: O 'not at all' 1 'a little bit' 2 'a lot' 3'a very great deal'
	// other rounds:  O 'not at all' 1 'just a little' 2 'somewhat' 3'a lot'
	sum trust*	
	local var "trust_president trust_parliament trust_electoral trust_locgov trust_party trust_opposition trust_police trust_army trust_courts trust_trad trust_religion trust_general trust_relatives trust_neighbors trust_otherpeople"
	foreach v of local var{
	gen no`v'=1 if `v'==0
	replace no`v'=0 if no`v'!=1 & `v'>=0 & `v'<9
	gen yes`v'=1 if `v'==3
	replace yes`v'=0 if yes`v'!=1 & `v'>=0 & `v'<9
	}
	
	gen health_notimproving=1 if health_improving==1
	replace health_notimproving=0 if health_notimproving!=1 & health_improving>0 & health_improving<9
	gen health_yesimproving=1 if health_improving==4
	replace health_yesimproving=0 if health_yesimproving!=1 & health_improving>0 & health_improving<9
	
	gen clinic=health_clinic if health_clinic>=0 & health_clinic<9 
	gen ab_n=1
	
	* weights
	browse *wt
	replace withinwt=Withinwt if withinwt==.
	
	* collapse
	collapse (mean) notrust* yestrust* clinic health_*improving year_f (rawsum) ab_n [iw=withinwt], by(round country region_corr)
	label var  ab_n "number of Afrobarometer observations from this region"
	
	drop yestrust_president-yestrust_otherpeople health_improving-health_yesimproving
	sort round country region_corr
	save "$datacons/r2to7_trust_collapsed.dta",replace
 
 


******************************************************************************************************
/*B4)- Now we want to match DHS with AB, by YOB of the kids
	- we have 6 rounds of AB to merge with
	- 2:2002-2004
	- 3:2005-2006
	- 4:2008-2009
	- 5:2011-2013
	- 6:2014-2015
	- 7:2016-2018 
*/	
******************************************************************************************************

	use "$datacons/r2to7_trust_collapsed.dta",clear	
	* for each country region round, show year collection was finished
		tab round year_f
		bysort country region_corr round:gen x=_n
		browse country region_corr round year_f if x==1 
	

	use "$datacons/vaccination_data_dhs3.dta",clear
	
	* drop observations which are born before AB surveys took place in that country
	* see excel file (ab_country_rounds)
	drop if yob<2003
	drop if yob<2005 & country=="benin"
	drop if yob<2005 & country=="madagascar"

	drop if yob<2008 & country=="burkina faso"
	drop if yob<2008 & country=="liberia"

	drop if yob<2011 & country=="burundi"
	drop if yob<2011 & country=="cameroon"
	drop if yob<2011 & country=="cote d'ivoire"
	drop if yob<2011 & country=="guinea"
	drop if yob<2011 & country=="niger"
	drop if yob<2011 & country=="sierra leone"
	drop if yob<2011 & country=="sudan" // all obs dropped... (all before 2003)
	drop if yob<2011 & country=="swaziland" // all obs dropped...
	drop if yob<2011 & country=="togo"

	drop if yob<2014 & country=="gabon" // all obs dropped...
	drop if yob<2014 & country=="sao tome and principe" // all obs dropped...
	
	
	* assign AB year to DHS data based on YOB kids
	gen round=.
	replace round=2 if yob<2005
	replace round=3 if yob<2008 & round==.
	replace round=4 if yob<2011 & round==.
	replace round=5 if yob<2014 & round==.
	replace round=6 if yob<2016 & round==.
	replace round=7 if round==.
	table round, c(mean yob min yob max yob)	
	
	
	* some additional corrections required to region variable in DHS
	* these were discovered after first attempts to merge AB and DHS

	* UGANDA: AB only has higer level ADM levels (see excel file "ab_dhs_regions_corr_commands") 
	replace region_corr="northern" if region_corr=="acholi" & country=="uganda"
	replace region_corr="western" if region_corr=="ankole" & country=="uganda"
	replace region_corr="eastern" if region_corr=="bugisu" & country=="uganda"
	replace region_corr="eastern" if region_corr=="bukedi" & country=="uganda"
	replace region_corr="western" if region_corr=="bunyoro" & country=="uganda"
	replace region_corr="eastern" if region_corr=="busoga" & country=="uganda"
	replace region_corr="central" if region_corr=="central 1" & country=="uganda"
	replace region_corr="central" if region_corr=="central 2" & country=="uganda"
	replace region_corr="central" if region_corr=="east central" & country=="uganda"
	replace region_corr="northern" if region_corr=="karamoja" & country=="uganda"
	replace region_corr="western" if region_corr=="kigezi" & country=="uganda"
	replace region_corr="northern" if region_corr=="lango" & country=="uganda"
	replace region_corr="central" if region_corr=="north buganda" & country=="uganda"
	replace region_corr="central" if region_corr=="south buganda" & country=="uganda"
	replace region_corr="western" if region_corr=="southwest" & country=="uganda"
	replace region_corr="eastern" if region_corr=="teso" & country=="uganda"
	replace region_corr="western" if region_corr=="tooro" & country=="uganda"
	replace region_corr="northern" if region_corr=="west nile" & country=="uganda"
	
	* IVORY: small "centre-est" region not in AB for round 5, replace with larger neighboring centre region
	tab region_corr if region_corr=="centre-est" & round==5	
	replace region_corr="centre" if region_corr=="centre-est" & round==5	
	
	* BURUNDI: 'bujumbura rural' in DHS, but just 'bujumbura' in AB
	* Rumonge was part of bururi province before & not yet included in AB
	* burundi not included in round 7 to set those to merge with round 6
	replace region_corr="bujumbura" if region_corr=="bujumbura rural" & country=="burundi"
	replace region_corr="bururi" if region_corr=="rumonge" & country=="burundi"
	replace round=6 if round==7 & country=="burundi"
	
	* GUINEA: 'faranah' region not included in AB round 6, so replace with round 5 value
	replace round=5 if round==6 & region_corr=="faranah" & country=="guinea"
	replace round=5 if round==7 & region_corr=="faranah" & country=="guinea"

	* MALI: regions 'gao' 'kidal' 'tombouctou' not in AB 5 but they are in AB 4 so replace
	replace round=4 if round==5 & region_corr=="kidal" & country=="mali"
	replace round=4 if round==5 & region_corr=="gao" & country=="mali"
	replace round=4 if round==5 & region_corr=="tombouctou" & country=="mali"
		
	* SENEGAL: 	* some administrative changes only adapted later in AB than in DHS
				* sedhiou was part of kolda before // kedougou was part of tambacounda before // kaffrine was part of kaolack before // matam was part of saint louis before
				* keep everything at most aggregate level, to make sure "region corr" remains unchanged over time
	replace region_corr="kolda" if region_corr=="sedhiou" & country=="senegal" 
	replace region_corr="tambacounda" if region_corr=="kedougou" & country=="senegal"
	replace region_corr="kaolack" if region_corr=="kaffrine" & country=="senegal" 
	replace region_corr="saint louis" if region_corr=="matam" & country=="senegal" 
	
	* TANZANIA
	* Njombe split from Iringa in 2012. Keep at aggregate Iringa level
	* Katavi split from Rukwa in 2012. Keep at aggregate Rukwa level
	* Parts of Mwanza & Shinyanga split to form Geita & Simuyu in 2012. Put all four at Mwanza level
	replace region_corr="rukwa" if region_corr=="katavi" & country=="tanzania" 
	replace region_corr="iringa" if region_corr=="njombe" & country=="tanzania" 
	replace region_corr="mwanza" if region_corr=="simiyu" & country=="tanzania" 
	replace region_corr="mwanza" if region_corr=="shinyanga" & country=="tanzania" 
	replace region_corr="mwanza" if region_corr=="geita" & country=="tanzania" 		
	
	replace round=3 if round==4 & region_corr=="pwani" & country=="tanzania" 
	replace region_corr="north unguja" if region_corr=="zanzibar north"  
	replace region_corr="south unguja" if region_corr=="zanzibar south"  
	replace region_corr="urban west" if region_corr=="town west"  
	replace round=2 if round==4 & region_corr=="mjini magharibi" & country=="tanzania"  								
	
	* UGANDA
	replace region_corr="central" if region_corr=="kampala" & round<4 & country=="uganda"
	
	* ZAMBIA (region muchinga split off from northern - keep at aggregate level)
	replace region_corr="northern" if region_corr=="muchinga" & country=="zambia"
	
	* Cameroon
	replace region_corr="adamaoua" if region_corr=="adamawa" & country=="cameroon"
	replace region_corr="centre" if region_corr=="centre (without yaounde)" & country=="cameroon"
	replace region_corr="centre" if region_corr=="yaounde" & country=="cameroon"
	replace region_corr="extreme nord" if region_corr=="far-north" & country=="cameroon"
	replace region_corr="littoral" if region_corr=="littoral (without douala)" & country=="cameroon"
	replace region_corr="nord" if region_corr=="north" & country=="cameroon"
	replace region_corr="nord ouest" if region_corr=="north-west" & country=="cameroon"
	replace region_corr="sud" if region_corr=="south" & country=="cameroon"
	replace region_corr="sud ouest" if region_corr=="south-west" & country=="cameroon"
	replace region_corr="ouest" if region_corr=="west" & country=="cameroon"
	replace region_corr="littoral" if region_corr=="douala" & country=="cameroon"
	
	* KENYA:
	replace region_corr="north eastern" if region_corr=="northeastern" & country=="kenya"

	* MADAGASCAR: AB only has higer level ADM levels for years that correspond to DHS 
	replace region_corr="toamasina" if region_corr=="alaotra mangoro" & country=="madagascar"
	replace region_corr="fianarantsoa" if region_corr=="amoroni mania" & country=="madagascar"
	replace region_corr="antananarivo" if region_corr=="analamanga" & country=="madagascar"
	replace region_corr="toamasina" if region_corr=="analanjirofo" & country=="madagascar"
	replace region_corr="toliary" if region_corr=="androy" & country=="madagascar"
	replace region_corr="fianarantsoa" if region_corr=="anosy" & country=="madagascar"
	replace region_corr="toliary" if region_corr=="atsimo andrefana" & country=="madagascar"
	replace region_corr="toamasina" if region_corr=="atsinanana" & country=="madagascar"
	replace region_corr="mahajanga" if region_corr=="betsiboka" & country=="madagascar"
	replace region_corr="mahajanga" if region_corr=="boeny" & country=="madagascar"
	replace region_corr="antananarivo" if region_corr=="bongolava" & country=="madagascar"
	replace region_corr="antsiranana" if region_corr=="diana" & country=="madagascar"
	replace region_corr="fianarantsoa" if region_corr=="haute matsiatra" & country=="madagascar"
	replace region_corr="fianarantsoa" if region_corr=="ihorombe" & country=="madagascar"
	replace region_corr="antananarivo" if region_corr=="itasy" & country=="madagascar"
	replace region_corr="mahajanga" if region_corr=="melaky" & country=="madagascar"
	replace region_corr="toliary" if region_corr=="menabe" & country=="madagascar"
	replace region_corr="antsiranana" if region_corr=="sava" & country=="madagascar"
	replace region_corr="mahajanga" if region_corr=="sofia" & country=="madagascar"
	replace region_corr="antananarivo" if region_corr=="vakinankaratra" & country=="madagascar"
	replace region_corr="fianarantsoa" if region_corr=="vatovavy fitonany" & country=="madagascar" 
 
 
	* merge with AB
	sort round country region_corr
	merge m:1 round country region_corr using "$datacons/r2to7_trust_collapsed.dta"
	
	drop if country=="gabon" | country=="sao tome and principe" | country=="swaziland" | country=="sudan"
	
	gen merged=1 if _merge==3
	replace merged=0 if merged!=1
	table country,c(mean merged)
	
 
	* Create var that indicates region
	egen idregion=group(country_id region_corr)
	
	* Create country X region X year indicator
	egen cregiony=group(country_id region_corr year)
	
	* Indicate countries for which I had to aggregate to higher admin level in AB or DHS 
	bysort country idregion:gen x=_n
	sort country idregion
	browse country idregion region_corr _merge if country=="madagascar" & x==1
	browse country idregion region_corr _merge if country=="cote d'ivoire" & x==1
	browse country idregion region_corr _merge if country=="uganda" & x==1
	browse country idregion region_corr _merge if country=="liberia" & x==1
	browse country idregion region_corr _merge if country=="nigeria" & x==1
	
	gen aggregated=1 if country=="madagascar" | country=="nigeria" |  country=="cote d'ivoire" 
	
	* since there is no perfect match of regions it is better to drop these countries
	drop if aggregated==1
	
	* Drop those that were not merged
	drop if _merge!=3
	drop _merge merged

	* Drop observations that don't have an AB round in 3 years before birth
	* Look at "year_f" which indicates when AB data was collected in the region
	drop if yob>year_f+3
		
	
	* check DHS surveys by country
	levelsof country, local(country) 
	foreach c of local country {
	di "`c'" 
	tab start_year if country=="`c'"
	}
	drop if country=="burundi" & start_year==2010 // only 8 obs. were merged
		
	levelsof country, local(country) 
	foreach c of local country {
	di "`c'" 
	sum y1_none sex age birthcat sex_head age_mother age_mother_1stbirth religion_mother literate school_mother school_father wealth nr_under5_w nr_hh_w urban visit_12m if country=="`c'"
	}		
	// Tanzania, South-Africa and Niger: no info on religion mother!
			
	label data "DHS child recode merged with AB collapsed at region level"
	compress
	save "$datacons/DHS_ABtrust(2-7).dta",replace
		
		
		
	
******************************************************************************************************
*C) Final preparations
******************************************************************************************************

* run do-file to get de-normalized sampling weights
	do "$dofile/weight_trust vaccines_BMJ.do"


* prepare final analytical sample, main database
	
	use "$datacons/DHS_ABtrust(2-7).dta",clear
	** create a var indicating the main sample to be used: i.e. obs. for which we have info on child, parents, HH controls & access to healthcare	
	// Tanzania, South Africa and Niger drop out because they don't have info on religion mother
	// "use" var that includes them (so does not take into accoung religion mother)
		gen use_norel=1
		local vars "y1_none sex age birthcat sex_head age_mother age_mother_1stbirth literate school_mother school_father wealth nr_under5_w nr_hh_w urban visit_12m problem_permission problem_money problem_distance problem_alone"
		foreach v of local vars{
		replace use_norel=. if `v'==.
		}
	
	** keep only observations we need
	keep if use_norel==1
	
	** create an indicator for each AB country-region-year
	egen region_AByear=group(idregion round)
	bysort region_AByear:gen region_AByearx=_n
	
	** Create 'public mistrust' index
	pca notrust_president notrust_parliament notrust_electoral notrust_courts notrust_locgov if region_AByearx==1 
	predict notrust_index
	label var notrust_index "mistrust in public institutions"
		
	** coefficients on the indices are hard to interpret, so standardize with mean 0 and standard deviation 1
	egen znotrust_index_norel=std(notrust_index) 
	label var znotrust_index_norel "mistrust in public institutions (standardized)"

	** create cluster-level variables for access to healthcare
	local var "visit_12m problem_permission problem_money problem_distance problem_alone"
	foreach v of local var{
	bysort country_cluster_year:egen `v'm=mean(`v')
	}
	
	** indicate age in months
	gen age_months=v008-b3
	
	gen age_c=.
	replace age_c=1 if age_months>11 & age_months<24
	replace age_c=2 if age_months>23 & age_months<36
	replace age_c=3 if age_months>35 & age_months<48
	replace age_c=4 if age_months>47 & age_months<60
	label var age_c "age cohort"
	label define age_c 1"12-23 months" 2"24-35 months" 3"36-47 months" 4"48-59 months"
	label values age_c age_c
	save "$datacons/DHS_ABtrust(2-7)_inclmissing.dta",replace

	
	** drop redundant variables & observations to speed up regressions
	local vars "y1* sex age birthcat sex_head age_mother age_mother_1stbirth literate school_mother school_father wealth nr_under5_w nr_hh_w urban visit_12m problem_permission problem_money problem_distance problem_alone yob religion_mother wgt aggregated *country* *region* *year* nomigrant *trust* visit_12mm problem_permissionm problem_moneym problem_distancem problem_alonem age_months age_c use_norel bcg polio1 polio2 polio3 measles dpt1 dpt2 dpt3 refuse_test round v101 cluster v002 v003 midx"
	keep `vars'


	** get fancy weights (see do-file that is ran above)	
	merge 1:1 cluster v002 v003 midx country_id start_year end_year using "$weight/fancy_weight.dta"
	drop if _merge==2 
	browse wgt wgt_check if wgt!=wgt_check
	drop _merge wgt_check
	sum wgt fancy_wgt
	
	** gen cluster leave out mean test refusal variable
		* create survey-cluster average of test refusal
		sort country_cluster_year refuse_test
		browse country_cluster_year refuse_test
		bysort country_cluster_year:egen mrefuse_test=mean(refuse_test)

		* second version, assuming everyone with missing information accepts test (so for sure lower bound)
		gen refuse_test2=refuse_test
		replace refuse_test2=0 if refuse_test==. & mrefuse_test!=.
		bysort country_cluster_year:egen mrefuse_test2=mean(refuse_test2)
		
		* third version, same as version 2, but leave-out mean
		bysort country_cluster_year: gen x=_N
		egen lm_refuse_test=total(refuse_test2==1), by (country_cluster_year)
		replace lm_refuse_test=lm_refuse_test-refuse_test2
		replace lm_refuse_test=lm_refuse_test/(x-1)
		label var lm_refuse_test "cluster-level refusal of blood test, leave-out mean"

	* gen SSA_region	
	gen ssa_region=.
	replace ssa_region=1 if country_id==4 | country_id==16 |country_id==20 |country_id==22 |country_id==34 |country_id==38 |country_id==36 |country_id==37 
	replace ssa_region=2 if country_id==5 
	replace ssa_region=3 if country_id==2 | country_id==3 | country_id==14 |country_id==15 |country_id==18 | country_id==21 |country_id==24 |country_id==29 |country_id==30 |country_id==35
	replace ssa_region=4 if country_id==17 | country_id==31 | country_id==23 
	
	label define ssa_region 1"east africa" 2"central africa" 3"west africa" 4"southern africa"
	label values ssa_region ssa_region		
	
	* keep only observations on children >11 months
	keep if age_months>11
			
	* re-standardize the insitutional mistrust index
	pca notrust_president notrust_parliament notrust_electoral notrust_courts notrust_locgov if region_AByearx==1 
	predict notrust_indexx
	egen znotrust_index_norelx=std(notrust_indexx)
	pwcorr znotrust_index_norelx znotrust_index_norel,sig // OK
					
	* rename some variables
	rename znotrust_index_norel znotrust_index

	* keep only variables that we use
	keep year idregion y1_none y1_full notrust_president notrust_parliament notrust_electoral notrust_courts notrust_locgov znotrust_index ///
	sex birthcat age_mother age_mother_1stbirth literate school_mother religion_mother school_father age_c ///
	sex_head wealth nr_under5_w nr_hh_w urban visit_12m problem_permission problem_money problem_distance problem_alone ///
	visit_12mm problem_permissionm problem_moneym problem_distancem problem_alonem ///
	bcg polio1 polio2 polio3 measles dpt1 dpt2 dpt3 wgt country_year round ///
	notrust_index country_id start_year end_year country countrycode nomigrant ///
	refuse_test lm_refuse_test notrust_relatives notrust_general fancy_wgt ssa_region
	
	* label remaining variables
	label var start_year "year of start DHS survey"
	label var end_year "year of end DHS survey"
	label var round "AB round"
	label var countrycode "country code"
	label var country_id "country id"
	label var country "country"
	label var wgt "DHS weights"
	label var fancy_wgt "de-normalized sampling weights"
	label var notrust_president "mistrust in head of state"
	label var notrust_parliament "mistrust in parliament"
	label var notrust_electoral "mistrust in electoral system"
	label var notrust_locgov "mistrust in local government"
	label var notrust_courts "mistrust in courts"
	label var notrust_general "mistrust in people in general"
	label var notrust_relatives "mistrust in relatives"
	label var nomigrant "mother lived here since birth"
	label var visit_12mm "DHS cluster mean that visited health facility last 12 months"
	label var problem_permissionm "DHS cluster mean that find getting permission to get medical help a big problem"
	label var problem_moneym "DHS cluster mean that find getting money for medical treatment a big problem"
	label var problem_distancem "DHS cluster mean that find distance to health facility a big problem"
	label var problem_alonem "DHS cluster mean that find not wanting to go alone a big hurdle"
	label var ssa_region "region in SSA"
	label var religion_mother "religion mother"
		
	* order vars
	order country countrycode country_id ssa_region year start_year end_year round ///
	y1_full y1_none bcg dpt1 dpt2 dpt3 polio1 polio2 polio3 measles ///
	sex birthcat age_mother age_mother_1stbirth literate school_mother religion_mother school_father sex_head wealth nr_under5_w nr_hh_w urban age_c ///
	visit_12m problem_permission problem_money problem_distance problem_alone visit_12mm problem_permissionm problem_moneym problem_distancem problem_alonem ///
	notrust_index znotrust_index notrust_president notrust_parliament notrust_electoral notrust_locgov notrust_courts notrust_general notrust_relatives ///
	refuse_test lm_refuse_test nomigrant country_year idregion wgt fancy_wgt

	save "$datacons/DHS_AB_BMJ.dta",replace	

	
	
* database looking at missing vaccination information	

	use "$datacons/DHS_ABtrust(2-7)_inclmissing.dta",clear	
	keep if age_months>11

	* gen var indicating missing vaccination information
	gen missing_vacc=1
	local vars "sex age birthcat sex_head age_mother age_mother_1stbirth literate school_mother school_father wealth nr_under5_w nr_hh_w urban visit_12m problem_permission problem_money problem_distance problem_alone"
	foreach v of local vars{
	replace missing_vacc=. if `v'==.
	}
		
	* drop the obs. for which other control vars are missing
	drop if use_norel==. & missing_vac==. 
	
	* there are 41,747 obs. with info on all control vars, apart from vaccination
	tab y1_none if missing_vac==1 & use_norel==.
	count if missing_vac==1 & use_norel==.
	
	* indicate analytical sample & the sample with missing info
	gen sample=1 if use_norel==1
	replace sample=0 if use_norel==. & missing_vac==1
	
	* From phase 7, only children born in 3 years preceding the survey were asked information on vaccination			
	tab year phase if year<2013
	tab year phase if year>2012
	gen phasex=substr(phase,3,1)
	destring phasex,replace
	gen phase7=phasex
	recode phase7 (4=0) (5=0) (6=0) (7=1)
	
	gen sample2=sample
	replace sample2=. if phase7==1 & age_months>35 & sample!=1

	table country_year, c(mean sample2) 
	
	gen notsample2=sample2
	recode notsample2 (0=1) (1=0)
	table country_year, c(mean notsample2) 
	sum notsample2			

	* keep only neccesary vars
	keep country_year notsample2 sex birthcat age_mother age_mother_1stbirth literate school_mother school_father y1_none y1_full ///
	sex_head wealth nr_under5_w nr_hh_w urban visit_12m problem_permission problem_money problem_distance problem_alone ///
	visit_12mm problem_permissionm problem_moneym problem_distancem problem_alonem year notrust_index znotrust_index idregion age_c 

	* rename some variables
	rename znotrust_index_norel znotrust_index
	rename notsample2 vacc_missing
	
	* label remaining vars
	label var visit_12mm "DHS cluster mean that visited health facility last 12 months"
	label var problem_permissionm "DHS cluster mean that find getting permission to get medical help a big problem"
	label var problem_moneym "DHS cluster mean that find getting money for medical treatment a big problem"
	label var problem_distancem "DHS cluster mean that find distance to health facility a big problem"
	label var problem_alonem "DHS cluster mean that find not wanting to go alone a big hurdle"
	label var vacc_missing "missing info on vaccination"
	
	* order variables
	order country y1_full y1_none sex birthcat age_mother age_mother_1stbirth literate school_mother school_father sex_head wealth nr_under5_w nr_hh_w urban ///
	visit_12m problem_permission problem_money problem_distance problem_alone visit_12mm problem_permissionm problem_moneym problem_distancem problem_alonem ///
	notrust_index znotrust_index country_year idregion			
	
	* label database & compress
	label data "DHS - AB database, to look at missing vaccination info"
	compress
	
	save "$datacons/DHS_AB_missing_BMJ.dta",replace
			
			
* database for maps	
	* created by only keeping info on vaccination coverage & mistrust in institutions from most recent DHS & AB survey in each country
	* then used QGIS to create the maps
		
		
		
*** END ***	
